From 0ec028fe105b9bcdda4196cc8803205a1577c12a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 15 Sep 2022 14:01:27 +0100 Subject: [PATCH] [CostModel][X86] Add CostKinds handling for vector shift by uniform/constuniform ops Vector shift by const uniform is the cheapest shift instruction we have, non-const uniform have a marginally higher cost - some targets 'splat' the amount internally to use the shift-per-element instruction, others see a higher cost for the explicit zeroing of the upper bits for the (64-bit) shift amount. This was achieved with an updated version of the 'cost-tables vs llvm-mca' script D103695 (I'll update the patch soon for reference) --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 319 +++++++-- llvm/test/Analysis/CostModel/X86/arith-fix.ll | 184 ++--- llvm/test/Analysis/CostModel/X86/arith-overflow.ll | 208 +++--- llvm/test/Analysis/CostModel/X86/div-codesize.ll | 184 +++-- llvm/test/Analysis/CostModel/X86/div-latency.ll | 231 +++++-- .../test/Analysis/CostModel/X86/div-sizelatency.ll | 188 +++-- llvm/test/Analysis/CostModel/X86/div.ll | 190 +++--- llvm/test/Analysis/CostModel/X86/fshl.ll | 460 ++++++------- llvm/test/Analysis/CostModel/X86/fshr.ll | 460 ++++++------- .../Analysis/CostModel/X86/intrinsic-cost-kinds.ll | 6 +- llvm/test/Analysis/CostModel/X86/mul.ll | 46 +- llvm/test/Analysis/CostModel/X86/rem-codesize.ll | 112 +-- llvm/test/Analysis/CostModel/X86/rem-latency.ll | 120 ++-- .../test/Analysis/CostModel/X86/rem-sizelatency.ll | 116 ++-- llvm/test/Analysis/CostModel/X86/rem.ll | 136 ++-- .../CostModel/X86/uniformshift-inseltpoison.ll | 6 +- llvm/test/Analysis/CostModel/X86/uniformshift.ll | 6 +- .../Analysis/CostModel/X86/vshift-ashr-codesize.ll | 562 +++++++++++---- .../CostModel/X86/vshift-ashr-cost-inseltpoison.ll | 450 +++++++----- .../Analysis/CostModel/X86/vshift-ashr-cost.ll | 450 +++++++----- .../Analysis/CostModel/X86/vshift-ashr-latency.ll | 758 ++++++++++++++++----- .../CostModel/X86/vshift-ashr-sizelatency.ll | 588 ++++++++++++---- .../Analysis/CostModel/X86/vshift-lshr-codesize.ll | 490 ++++++++++--- .../CostModel/X86/vshift-lshr-cost-inseltpoison.ll | 380 ++++++----- .../Analysis/CostModel/X86/vshift-lshr-cost.ll | 380 ++++++----- .../Analysis/CostModel/X86/vshift-lshr-latency.ll | 754 ++++++++++++++------ .../CostModel/X86/vshift-lshr-sizelatency.ll | 532 +++++++++++---- .../Analysis/CostModel/X86/vshift-shl-codesize.ll | 494 ++++++++++---- .../CostModel/X86/vshift-shl-cost-inseltpoison.ll | 400 +++++------ .../test/Analysis/CostModel/X86/vshift-shl-cost.ll | 400 +++++------ .../Analysis/CostModel/X86/vshift-shl-latency.ll | 740 +++++++++++++------- .../CostModel/X86/vshift-shl-sizelatency.ll | 530 ++++++++++---- .../Transforms/LoopVectorize/X86/uniformshift.ll | 4 +- 33 files changed, 7095 insertions(+), 3789 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index d7e00a1..8e2c801 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -329,16 +329,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( } static const CostKindTblEntry AVX512BWUniformConstCostTable[] = { - { ISD::SHL, MVT::v64i8, { 2 } }, // psllw + pand. - { ISD::SRL, MVT::v64i8, { 2 } }, // psrlw + pand. - { ISD::SRA, MVT::v64i8, { 4 } }, // psrlw, pand, pxor, psubb. - - { ISD::SHL, MVT::v16i16, { 1 } }, // psllw - { ISD::SRL, MVT::v16i16, { 1 } }, // psrlw - { ISD::SRA, MVT::v16i16, { 1 } }, // psrlw - { ISD::SHL, MVT::v32i16, { 1 } }, // psllw - { ISD::SRL, MVT::v32i16, { 1 } }, // psrlw - { ISD::SRA, MVT::v32i16, { 1 } }, // psrlw + { ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } }, // psllw + pand. + { ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } }, // psrlw + pand. + { ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } }, // psllw + pand. + { ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } }, // psrlw + pand. + { ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } }, // psllw + { ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } }, // psrlw + { ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } }, // psrlw + { ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } }, // psllw + { ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } }, // psrlw + { ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } }, // psrlw }; if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasBWI()) @@ -348,18 +354,33 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( return LT.first * KindCost.value(); static const CostKindTblEntry AVX512UniformConstCostTable[] = { - { ISD::SRA, MVT::v2i64, { 1 } }, - { ISD::SRA, MVT::v4i64, { 1 } }, - { ISD::SRA, MVT::v8i64, { 1 } }, - - { ISD::SHL, MVT::v64i8, { 4 } }, // psllw + pand. - { ISD::SRL, MVT::v64i8, { 4 } }, // psrlw + pand. - { ISD::SRA, MVT::v64i8, { 8 } }, // psrlw, pand, pxor, psubb. - - { ISD::SDIV, MVT::v16i32, { 6 } }, // pmuludq sequence - { ISD::SREM, MVT::v16i32, { 8 } }, // pmuludq+mul+sub sequence - { ISD::UDIV, MVT::v16i32, { 5 } }, // pmuludq sequence - { ISD::UREM, MVT::v16i32, { 7 } }, // pmuludq+mul+sub sequence + { ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } }, // psllw + pand. + { ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } }, // psrlw + pand. + { ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } }, // psllw + split. + { ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } }, // psrlw + split. + { ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } }, // psraw + split. + + { ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } }, // pslld + { ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } }, // psrld + { ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } }, // psrad + { ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } }, // pslld + { ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } }, // psrld + { ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } }, // psrad + + { ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } }, // psraq + { ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } }, // psllq + { ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } }, // psrlq + { ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } }, // psraq + { ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } }, // psllq + { ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } }, // psrlq + { ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } }, // psraq + + { ISD::SDIV, MVT::v16i32, { 6 } }, // pmuludq sequence + { ISD::SREM, MVT::v16i32, { 8 } }, // pmuludq+mul+sub sequence + { ISD::UDIV, MVT::v16i32, { 5 } }, // pmuludq sequence + { ISD::UREM, MVT::v16i32, { 7 } }, // pmuludq+mul+sub sequence }; if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasAVX512()) @@ -369,11 +390,33 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( return LT.first * KindCost.value(); static const CostKindTblEntry AVX2UniformConstCostTable[] = { - { ISD::SHL, MVT::v32i8, { 2 } }, // psllw + pand. - { ISD::SRL, MVT::v32i8, { 2 } }, // psrlw + pand. - { ISD::SRA, MVT::v32i8, { 4 } }, // psrlw, pand, pxor, psubb. - - { ISD::SRA, MVT::v4i64, { 4 } }, // 2 x psrad + shuffle. + { ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } }, // psllw + pand. + { ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } }, // psrlw + pand. + { ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } }, // psllw + { ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } }, // psrlw + { ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } }, // psraw + { ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } }, // psllw + { ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } }, // psrlw + { ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } }, // psraw + + { ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } }, // pslld + { ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } }, // psrld + { ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } }, // psrad + { ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } }, // pslld + { ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } }, // psrld + { ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } }, // psrad + + { ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } }, // psllq + { ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } }, // psrlq + { ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } }, // psrad + shuffle. + { ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } }, // psllq + { ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } }, // psrlq + { ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } }, // psrad + shuffle + split. { ISD::SDIV, MVT::v8i32, { 6 } }, // pmuludq sequence { ISD::SREM, MVT::v8i32, { 8 } }, // pmuludq+mul+sub sequence @@ -387,28 +430,75 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( if (auto KindCost = Entry->Cost[CostKind]) return LT.first * KindCost.value(); + static const CostKindTblEntry AVXUniformConstCostTable[] = { + { ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } }, // 2*(psllw + pand) + split. + { ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } }, // 2*(psrlw + pand) + split. + { ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } }, // 2*(psrlw, pand, pxor, psubb) + split. + + { ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } }, // psllw. + { ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } }, // psrlw. + { ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } }, // psraw. + { ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } }, // psllw + split. + { ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } }, // psrlw + split. + { ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } }, // psraw + split. + + { ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } }, // pslld. + { ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } }, // psrld. + { ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } }, // psrad. + { ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } }, // pslld + split. + { ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } }, // psrld + split. + { ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } }, // psrad + split. + + { ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } }, // psllq. + { ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } }, // psrlq. + { ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } }, // psrad + shuffle. + { ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } }, // 2 x psllq + split. + { ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } }, // 2 x psllq + split. + { ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } }, // 2 x psrad + shuffle + split. + + { ISD::SDIV, MVT::v8i32, { 14 } }, // 2*pmuludq sequence + split. + { ISD::SREM, MVT::v8i32, { 18 } }, // 2*pmuludq+mul+sub sequence + split. + { ISD::UDIV, MVT::v8i32, { 12 } }, // 2*pmuludq sequence + split. + { ISD::UREM, MVT::v8i32, { 16 } }, // 2*pmuludq+mul+sub sequence + split. + }; + + // XOP has faster vXi8 shifts. + if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasAVX() && + (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8)) + if (const auto *Entry = + CostTableLookup(AVXUniformConstCostTable, ISD, LT.second)) + if (auto KindCost = Entry->Cost[CostKind]) + return LT.first * KindCost.value(); + static const CostKindTblEntry SSE2UniformConstCostTable[] = { - { ISD::SHL, MVT::v16i8, { 2 } }, // psllw + pand. - { ISD::SRL, MVT::v16i8, { 2 } }, // psrlw + pand. - { ISD::SRA, MVT::v16i8, { 4 } }, // psrlw, pand, pxor, psubb. - - { ISD::SHL, MVT::v32i8, { 4+2 } }, // 2*(psllw + pand) + split. - { ISD::SRL, MVT::v32i8, { 4+2 } }, // 2*(psrlw + pand) + split. - { ISD::SRA, MVT::v32i8, { 8+2 } }, // 2*(psrlw, pand, pxor, psubb) + split. - - { ISD::SDIV, MVT::v8i32, { 12+2 } }, // 2*pmuludq sequence + split. - { ISD::SREM, MVT::v8i32, { 16+2 } }, // 2*pmuludq+mul+sub sequence + split. - { ISD::SDIV, MVT::v4i32, { 6 } }, // pmuludq sequence - { ISD::SREM, MVT::v4i32, { 8 } }, // pmuludq+mul+sub sequence - { ISD::UDIV, MVT::v8i32, { 10+2 } }, // 2*pmuludq sequence + split. - { ISD::UREM, MVT::v8i32, { 14+2 } }, // 2*pmuludq+mul+sub sequence + split. - { ISD::UDIV, MVT::v4i32, { 5 } }, // pmuludq sequence - { ISD::UREM, MVT::v4i32, { 7 } }, // pmuludq+mul+sub sequence + { ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } }, // psllw. + { ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } }, // psrlw. + { ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } }, // psraw. + + { ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } }, // pslld + { ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } }, // psrld. + { ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } }, // psrad. + + { ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } }, // psllq. + { ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } }, // psrlq. + { ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } }, // 2 x psrad + shuffle. + + { ISD::SDIV, MVT::v4i32, { 6 } }, // pmuludq sequence + { ISD::SREM, MVT::v4i32, { 8 } }, // pmuludq+mul+sub sequence + { ISD::UDIV, MVT::v4i32, { 5 } }, // pmuludq sequence + { ISD::UREM, MVT::v4i32, { 7 } }, // pmuludq+mul+sub sequence }; // XOP has faster vXi8 shifts. if (Op2Info.isUniform() && Op2Info.isConstant() && ST->hasSSE2() && - !ST->hasXOP()) + (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8)) if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) if (auto KindCost = Entry->Cost[CostKind]) @@ -525,10 +615,44 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( if (auto KindCost = Entry->Cost[CostKind]) return LT.first * KindCost.value(); + static const CostKindTblEntry AVX512BWUniformCostTable[] = { + { ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 4,12, 8,12 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } }, // psllw + pand. + { ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } }, // psrlw + pand. + { ISD::SRA, MVT::v32i8, { 5,10,10,13 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } }, // psllw + pand. + { ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } }, // psrlw + pand. + { ISD::SRA, MVT::v64i8, { 5,10,10,15 } }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } }, // psllw + { ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } }, // psrlw + { ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } }, // psrqw + }; + + if (ST->hasBWI() && Op2Info.isUniform()) + if (const auto *Entry = + CostTableLookup(AVX512BWUniformCostTable, ISD, LT.second)) + if (auto KindCost = Entry->Cost[CostKind]) + return LT.first * KindCost.value(); + static const CostKindTblEntry AVX512UniformCostTable[] = { - { ISD::SRA, MVT::v2i64, { 1 } }, - { ISD::SRA, MVT::v4i64, { 1 } }, - { ISD::SRA, MVT::v8i64, { 1 } }, + { ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } }, // psllw + split. + { ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } }, // psrlw + split. + { ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } }, // psraw + split. + + { ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } }, // pslld + { ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } }, // psrld + { ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } }, // psrad + + { ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } }, // psraq + { ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } }, // psllq + { ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } }, // psrlq + { ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } }, // psraq + { ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } }, // psllq + { ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } }, // psrlq + { ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } }, // psraq }; if (ST->hasAVX512() && Op2Info.isUniform()) @@ -539,18 +663,33 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( static const CostKindTblEntry AVX2UniformCostTable[] = { // Uniform splats are cheaper for the following instructions. - { ISD::SHL, MVT::v16i16, { 1 } }, // psllw. - { ISD::SRL, MVT::v16i16, { 1 } }, // psrlw. - { ISD::SRA, MVT::v16i16, { 1 } }, // psraw. - { ISD::SHL, MVT::v32i16, { 2 } }, // 2*psllw. - { ISD::SRL, MVT::v32i16, { 2 } }, // 2*psrlw. - { ISD::SRA, MVT::v32i16, { 2 } }, // 2*psraw. - - { ISD::SHL, MVT::v8i32, { 1 } }, // pslld - { ISD::SRL, MVT::v8i32, { 1 } }, // psrld - { ISD::SRA, MVT::v8i32, { 1 } }, // psrad - { ISD::SHL, MVT::v4i64, { 1 } }, // psllq - { ISD::SRL, MVT::v4i64, { 1 } }, // psrlq + { ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } }, // psllw + pand. + { ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } }, // psrlw + pand. + { ISD::SRA, MVT::v32i8, { 6, 9,11,16 } }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } }, // psllw. + { ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } }, // psrlw. + { ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } }, // psraw. + { ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } }, // psllw. + { ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } }, // psrlw. + { ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } }, // psraw. + + { ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } }, // pslld + { ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } }, // psrld + { ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } }, // psrad + { ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } }, // pslld + { ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } }, // psrld + { ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } }, // psrad + + { ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } }, // psllq + { ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } }, // psrlq + { ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } }, // 2 x psrad + shuffle. + { ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } }, // psllq + { ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } }, // psrlq + { ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } }, // 2 x psrad + shuffle. { ISD::SRA, MVT::v4i64, { 4 } }, // 2*psrad + shuffle. }; @@ -560,21 +699,65 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( if (auto KindCost = Entry->Cost[CostKind]) return LT.first * KindCost.value(); + static const CostKindTblEntry AVXUniformCostTable[] = { + { ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } }, // psrlw, pand, pxor, psubb. + { ISD::SHL, MVT::v32i8, { 7, 8,11,14 } }, // psllw + pand + split. + { ISD::SRL, MVT::v32i8, { 7, 9,10,14 } }, // psrlw + pand + split. + { ISD::SRA, MVT::v32i8, { 10,11,16,21 } }, // psrlw, pand, pxor, psubb + split. + + { ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } }, // psllw. + { ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } }, // psrlw. + { ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } }, // psraw. + { ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } }, // psllw + split. + { ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } }, // psrlw + split. + { ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } }, // psraw + split. + + { ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } }, // pslld. + { ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } }, // psrld. + { ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } }, // psrad. + { ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } }, // pslld + split. + { ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } }, // psrld + split. + { ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } }, // psrad + split. + + { ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } }, // psllq. + { ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } }, // psrlq. + { ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } }, // 2 x psrad + shuffle. + { ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } }, // psllq + split. + { ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } }, // psrlq + split. + { ISD::SRA, MVT::v4i64, { 6, 7,10,13 } }, // 2 x (2 x psrad + shuffle) + split. + }; + + // XOP has faster vXi8 shifts. + if (ST->hasAVX() && Op2Info.isUniform() && + (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8)) + if (const auto *Entry = + CostTableLookup(AVXUniformCostTable, ISD, LT.second)) + if (auto KindCost = Entry->Cost[CostKind]) + return LT.first * KindCost.value(); + static const CostKindTblEntry SSE2UniformCostTable[] = { // Uniform splats are cheaper for the following instructions. - { ISD::SHL, MVT::v8i16, { 1 } }, // psllw. - { ISD::SHL, MVT::v4i32, { 1 } }, // pslld - { ISD::SHL, MVT::v2i64, { 1 } }, // psllq. + { ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } }, // psllw + pand. + { ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } }, // pcmpgtb sequence. - { ISD::SRL, MVT::v8i16, { 1 } }, // psrlw. - { ISD::SRL, MVT::v4i32, { 1 } }, // psrld. - { ISD::SRL, MVT::v2i64, { 1 } }, // psrlq. + { ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } }, // psllw. + { ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } }, // psrlw. + { ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } }, // psraw. - { ISD::SRA, MVT::v8i16, { 1 } }, // psraw. - { ISD::SRA, MVT::v4i32, { 1 } }, // psrad. + { ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } }, // pslld + { ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } }, // psrld. + { ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } }, // psrad. + + { ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } }, // psllq. + { ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } }, // psrlq. + { ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } }, // 2*psrlq + xor + sub. }; - if (ST->hasSSE2() && Op2Info.isUniform()) + if (ST->hasSSE2() && Op2Info.isUniform() && + (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8)) if (const auto *Entry = CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) if (auto KindCost = Entry->Cost[CostKind]) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fix.ll b/llvm/test/Analysis/CostModel/X86/arith-fix.ll index 1febfee..31ca29a 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -46,9 +46,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'smul' @@ -65,45 +65,45 @@ define i32 @smul(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'smul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'smul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -119,12 +119,12 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'smul' @@ -141,9 +141,9 @@ define i32 @smul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'smul' @@ -157,12 +157,12 @@ define i32 @smul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'smul' @@ -179,9 +179,9 @@ define i32 @smul(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'smul' @@ -198,28 +198,28 @@ define i32 @smul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'smul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) @@ -280,9 +280,9 @@ define i32 @umul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'umul' @@ -299,45 +299,45 @@ define i32 @umul(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'umul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'umul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -353,12 +353,12 @@ define i32 @umul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'umul' @@ -375,9 +375,9 @@ define i32 @umul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'umul' @@ -391,12 +391,12 @@ define i32 @umul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'umul' @@ -413,9 +413,9 @@ define i32 @umul(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'umul' @@ -432,28 +432,28 @@ define i32 @umul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'umul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index 8ec7d48..c282c4a 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -990,9 +990,9 @@ declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x define i32 @smul(i32 %arg) { ; SSSE3-LABEL: 'smul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1002,16 +1002,16 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'smul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1021,47 +1021,47 @@ define i32 @smul(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'smul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'smul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'smul' @@ -1075,12 +1075,12 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'smul' @@ -1097,9 +1097,9 @@ define i32 @smul(i32 %arg) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'smul' @@ -1113,19 +1113,19 @@ define i32 @smul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'smul' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1135,16 +1135,16 @@ define i32 @smul(i32 %arg) { ; SLM-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'smul' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -1154,28 +1154,28 @@ define i32 @smul(i32 %arg) { ; GLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'smul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -1270,17 +1270,17 @@ define i32 @umul(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'umul' @@ -1289,17 +1289,17 @@ define i32 @umul(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'umul' @@ -1316,9 +1316,9 @@ define i32 @umul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'umul' @@ -1354,9 +1354,9 @@ define i32 @umul(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'umul' @@ -1403,17 +1403,17 @@ define i32 @umul(i32 %arg) { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/X86/div-codesize.ll b/llvm/test/Analysis/CostModel/X86/div-codesize.ll index f5045a5..0966b52 100644 --- a/llvm/test/Analysis/CostModel/X86/div-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/div-codesize.ll @@ -434,59 +434,59 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' @@ -500,12 +500,12 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' @@ -522,9 +522,9 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -551,24 +551,100 @@ define i32 @sdiv_uniformconstpow2() { } define i32 @udiv_uniformconstpow2() { -; CHECK-LABEL: 'udiv_uniformconstpow2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE-LABEL: 'udiv_uniformconstpow2' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX1-LABEL: 'udiv_uniformconstpow2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX2-LABEL: 'udiv_uniformconstpow2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'udiv_uniformconstpow2' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'udiv_uniformconstpow2' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 %V2i64 = udiv <2 x i64> undef, diff --git a/llvm/test/Analysis/CostModel/X86/div-latency.ll b/llvm/test/Analysis/CostModel/X86/div-latency.ll index d67fcc0..f891c7c 100644 --- a/llvm/test/Analysis/CostModel/X86/div-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/div-latency.ll @@ -453,59 +453,59 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' @@ -519,12 +519,12 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' @@ -541,28 +541,28 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sdiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -589,24 +589,119 @@ define i32 @sdiv_uniformconstpow2() { } define i32 @udiv_uniformconstpow2() { -; CHECK-LABEL: 'udiv_uniformconstpow2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE-LABEL: 'udiv_uniformconstpow2' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX1-LABEL: 'udiv_uniformconstpow2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX2-LABEL: 'udiv_uniformconstpow2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'udiv_uniformconstpow2' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'udiv_uniformconstpow2' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLM-LABEL: 'udiv_uniformconstpow2' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 %V2i64 = udiv <2 x i64> undef, diff --git a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll index 8cb9177..2eaff0f 100644 --- a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll @@ -434,59 +434,59 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' @@ -500,12 +500,12 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' @@ -522,9 +522,9 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -551,24 +551,100 @@ define i32 @sdiv_uniformconstpow2() { } define i32 @udiv_uniformconstpow2() { -; CHECK-LABEL: 'udiv_uniformconstpow2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE-LABEL: 'udiv_uniformconstpow2' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX1-LABEL: 'udiv_uniformconstpow2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX2-LABEL: 'udiv_uniformconstpow2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512F-LABEL: 'udiv_uniformconstpow2' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512BW-LABEL: 'udiv_uniformconstpow2' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 %V2i64 = udiv <2 x i64> undef, diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll index 4ef5e95..75d03f5 100644 --- a/llvm/test/Analysis/CostModel/X86/div.ll +++ b/llvm/test/Analysis/CostModel/X86/div.ll @@ -1099,9 +1099,9 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE2-LABEL: 'sdiv_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, @@ -1111,16 +1111,16 @@ define i32 @sdiv_uniformconstpow2() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sdiv_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, @@ -1130,16 +1130,16 @@ define i32 @sdiv_uniformconstpow2() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sdiv_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, @@ -1149,47 +1149,47 @@ define i32 @sdiv_uniformconstpow2() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' @@ -1203,12 +1203,12 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' @@ -1225,16 +1225,16 @@ define i32 @sdiv_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'sdiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, @@ -1244,28 +1244,28 @@ define i32 @sdiv_uniformconstpow2() { ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sdiv_uniformconstpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = sdiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = sdiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -1306,45 +1306,45 @@ define i32 @udiv_uniformconstpow2() { ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -1360,12 +1360,12 @@ define i32 @udiv_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' @@ -1382,28 +1382,28 @@ define i32 @udiv_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'udiv_uniformconstpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll index 011b77f..eae376c 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl.ll @@ -312,18 +312,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i64' @@ -331,8 +331,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i64' @@ -340,8 +340,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_funnel_i64' @@ -357,18 +357,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i64' @@ -376,8 +376,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; XOP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; XOP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; XOP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; XOP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer @@ -394,9 +394,9 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i32' @@ -404,8 +404,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i32' @@ -413,8 +413,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_funnel_i32' @@ -422,26 +422,26 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i32' @@ -449,8 +449,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; XOP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; XOP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; XOP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; XOP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer @@ -467,9 +467,9 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i16' @@ -477,8 +477,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i16' @@ -486,8 +486,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i16' @@ -495,8 +495,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i16' @@ -504,7 +504,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -513,26 +513,26 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i16' @@ -540,8 +540,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; XOP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; XOP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; XOP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; XOP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer @@ -554,48 +554,39 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 } define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { -; SSSE3-LABEL: 'splatvar_funnel_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; SSE42-LABEL: 'splatvar_funnel_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'splatvar_funnel_i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -603,17 +594,17 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -621,18 +612,18 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i8' @@ -956,15 +947,15 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_funnel_i64' @@ -991,8 +982,8 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1013,15 +1004,15 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_funnel_i32' @@ -1048,8 +1039,8 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) ; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1070,22 +1061,22 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' @@ -1098,8 +1089,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' @@ -1119,8 +1110,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1133,58 +1124,58 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' @@ -1491,9 +1482,9 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i64' @@ -1501,8 +1492,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i64' @@ -1510,8 +1501,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_rotate_i64' @@ -1527,18 +1518,18 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i64' @@ -1564,9 +1555,9 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i32' @@ -1574,8 +1565,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i32' @@ -1583,8 +1574,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_rotate_i32' @@ -1600,18 +1591,18 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i32' @@ -1637,9 +1628,9 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i16' @@ -1647,8 +1638,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i16' @@ -1656,8 +1647,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i16' @@ -1665,8 +1656,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i16' @@ -1683,26 +1674,26 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i16' @@ -1724,48 +1715,39 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 } define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { -; SSSE3-LABEL: 'splatvar_rotate_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; SSE42-LABEL: 'splatvar_rotate_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'splatvar_rotate_i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -1782,8 +1764,8 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -1791,18 +1773,18 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i8' @@ -2112,15 +2094,15 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' @@ -2169,15 +2151,15 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' @@ -2226,22 +2208,22 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' @@ -2254,8 +2236,8 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' @@ -2289,30 +2271,30 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' @@ -2324,23 +2306,23 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, < ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll index f9237e0..0d6f4d4 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr.ll @@ -312,18 +312,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatvar_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i64' @@ -331,8 +331,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i64' @@ -340,8 +340,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_funnel_i64' @@ -357,18 +357,18 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i64' @@ -376,8 +376,8 @@ define void @splatvar_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128) -; XOP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) -; XOP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) +; XOP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256) +; XOP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer @@ -394,9 +394,9 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i32' @@ -404,8 +404,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i32' @@ -413,8 +413,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_funnel_i32' @@ -422,26 +422,26 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i32' @@ -449,8 +449,8 @@ define void @splatvar_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %u128) -; XOP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) -; XOP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) +; XOP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %u256) +; XOP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %u512) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer @@ -467,9 +467,9 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i16' @@ -477,8 +477,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i16' @@ -486,8 +486,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i16' @@ -495,8 +495,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i16' @@ -504,7 +504,7 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -513,26 +513,26 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i16' @@ -540,8 +540,8 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %u128) -; XOP-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) -; XOP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) +; XOP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %u256) +; XOP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %u512) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer @@ -554,48 +554,39 @@ define void @splatvar_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 } define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { -; SSSE3-LABEL: 'splatvar_funnel_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; SSE42-LABEL: 'splatvar_funnel_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'splatvar_funnel_i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -603,17 +594,17 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -621,18 +612,18 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_funnel_i8' @@ -956,15 +947,15 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_funnel_i64' @@ -991,8 +982,8 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1013,15 +1004,15 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_funnel_i32' @@ -1048,8 +1039,8 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) ; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1070,22 +1061,22 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' @@ -1098,8 +1089,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' @@ -1119,8 +1110,8 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1133,58 +1124,58 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' @@ -1492,9 +1483,9 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i64' @@ -1502,8 +1493,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i64' @@ -1511,8 +1502,8 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_rotate_i64' @@ -1528,18 +1519,18 @@ define void @splatvar_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i64' @@ -1565,9 +1556,9 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i32' @@ -1575,8 +1566,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i32' @@ -1584,8 +1575,8 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatvar_rotate_i32' @@ -1601,18 +1592,18 @@ define void @splatvar_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <4 x i32> %c128, <4 x i32> undef, <4 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <8 x i32> %c256, <8 x i32> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <16 x i32> %c512, <16 x i32> undef, <16 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i32' @@ -1638,9 +1629,9 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i16' @@ -1648,8 +1639,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i16' @@ -1657,8 +1648,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i16' @@ -1666,8 +1657,8 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i16' @@ -1684,26 +1675,26 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <8 x i16> %c128, <8 x i16> undef, <8 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <16 x i16> %c256, <16 x i16> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <32 x i16> %c512, <32 x i16> undef, <32 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i16' @@ -1725,48 +1716,39 @@ define void @splatvar_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 } define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) { -; SSSE3-LABEL: 'splatvar_rotate_i8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; SSE42-LABEL: 'splatvar_rotate_i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE-LABEL: 'splatvar_rotate_i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; SSE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; SSE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatvar_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatvar_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -1783,8 +1765,8 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -1792,18 +1774,18 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatvar_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; GLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatvar_rotate_i8' @@ -2113,15 +2095,15 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' @@ -2170,15 +2152,15 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' @@ -2227,22 +2209,22 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' @@ -2255,8 +2237,8 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' @@ -2290,30 +2272,30 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' @@ -2325,23 +2307,23 @@ define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, < ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 887309a..155d509 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -56,17 +56,17 @@ define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) { ; ; LATE-LABEL: 'umul' ; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'umul' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) -; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) +; SIZE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'umul' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b) diff --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll index 0503391..4639b5d 100644 --- a/llvm/test/Analysis/CostModel/X86/mul.ll +++ b/llvm/test/Analysis/CostModel/X86/mul.ll @@ -248,39 +248,39 @@ define i32 @mul_uniformconstpow2() { ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = mul <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64i8 = mul <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = mul <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' @@ -294,12 +294,12 @@ define i32 @mul_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = mul <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = mul <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64i8 = mul <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' diff --git a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll index a80e68a..01b3dc1 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll @@ -510,78 +510,78 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' @@ -595,12 +595,12 @@ define i32 @srem_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' @@ -617,28 +617,28 @@ define i32 @srem_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-latency.ll b/llvm/test/Analysis/CostModel/X86/rem-latency.ll index f1bb531..207ac86 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-latency.ll @@ -510,78 +510,78 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' @@ -595,12 +595,12 @@ define i32 @srem_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' @@ -617,28 +617,28 @@ define i32 @srem_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i8 = srem <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V64i8 = srem <64 x i8> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll index 3b3753c..e7768fa 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll @@ -510,78 +510,78 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' @@ -595,12 +595,12 @@ define i32 @srem_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' @@ -617,28 +617,28 @@ define i32 @srem_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll index 2154e97..a9e8815 100644 --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -1099,9 +1099,9 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1111,16 +1111,16 @@ define i32 @srem_uniformconstpow2() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'srem_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4i64 = srem <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1130,16 +1130,16 @@ define i32 @srem_uniformconstpow2() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1149,47 +1149,47 @@ define i32 @srem_uniformconstpow2() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' @@ -1203,12 +1203,12 @@ define i32 @srem_uniformconstpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' @@ -1225,16 +1225,16 @@ define i32 @srem_uniformconstpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1244,16 +1244,16 @@ define i32 @srem_uniformconstpow2() { ; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'srem_uniformconstpow2' ; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i64 = srem <4 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8i64 = srem <8 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, ; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, @@ -1263,28 +1263,28 @@ define i32 @srem_uniformconstpow2() { ; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, ; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = srem <32 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = srem <64 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconstpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 diff --git a/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll index 2020da3..2cad2e6 100644 --- a/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll @@ -4,7 +4,7 @@ define <4 x i32> @shl(<4 x i32> %vector, i32 %scalar) { entry: ; SSE2: 'shl' - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN: movd %edi, %xmm1 ; SSE2-CODEGEN: pslld %xmm1, %xmm0 %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0 @@ -16,7 +16,7 @@ entry: define <4 x i32> @ashr(<4 x i32> %vector, i32 %scalar) { entry: ; SSE2: 'ashr' - ; SSE2: cost of 1 {{.*}} ashr + ; SSE2: cost of 2 {{.*}} ashr ; SSE2-CODEGEN: movd %edi, %xmm1 ; SSE2-CODEGEN: psrad %xmm1, %xmm0 %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0 @@ -28,7 +28,7 @@ entry: define <4 x i32> @lshr(<4 x i32> %vector, i32 %scalar) { entry: ; SSE2: 'lshr' - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN: movd %edi, %xmm1 ; SSE2-CODEGEN: psrld %xmm1, %xmm0 %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0 diff --git a/llvm/test/Analysis/CostModel/X86/uniformshift.ll b/llvm/test/Analysis/CostModel/X86/uniformshift.ll index 8b68886..e9c2e2a 100644 --- a/llvm/test/Analysis/CostModel/X86/uniformshift.ll +++ b/llvm/test/Analysis/CostModel/X86/uniformshift.ll @@ -4,7 +4,7 @@ define <4 x i32> @shl(<4 x i32> %vector, i32 %scalar) { entry: ; SSE2: 'shl' - ; SSE2: cost of 1 {{.*}} shl + ; SSE2: cost of 2 {{.*}} shl ; SSE2-CODEGEN: movd %edi, %xmm1 ; SSE2-CODEGEN: pslld %xmm1, %xmm0 %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0 @@ -16,7 +16,7 @@ entry: define <4 x i32> @ashr(<4 x i32> %vector, i32 %scalar) { entry: ; SSE2: 'ashr' - ; SSE2: cost of 1 {{.*}} ashr + ; SSE2: cost of 2 {{.*}} ashr ; SSE2-CODEGEN: movd %edi, %xmm1 ; SSE2-CODEGEN: psrad %xmm1, %xmm0 %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0 @@ -28,7 +28,7 @@ entry: define <4 x i32> @lshr(<4 x i32> %vector, i32 %scalar) { entry: ; SSE2: 'lshr' - ; SSE2: cost of 1 {{.*}} lshr + ; SSE2: cost of 2 {{.*}} lshr ; SSE2-CODEGEN: movd %edi, %xmm1 ; SSE2-CODEGEN: psrld %xmm1, %xmm0 %insert = insertelement <4 x i32> undef, i32 %scalar, i32 0 diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll index d6758e9..2e92fa2 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 @@ -138,25 +138,25 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatvar_shift_v2i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' @@ -175,37 +175,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' @@ -217,7 +217,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -230,37 +230,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' @@ -272,7 +272,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -579,32 +585,44 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatvar_shift_v16i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -616,25 +634,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,19 +664,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -671,25 +695,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,19 +725,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -839,27 +869,99 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX-LABEL: 'splatconstant_shift_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'splatconstant_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, ret <2 x i64> %shift } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, ret <8 x i64> %shift @@ -875,18 +977,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, ret <16 x i32> %shift @@ -902,45 +1060,197 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll index 2bcadd0..78d03b54 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll @@ -482,26 +482,38 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; AVX1-LABEL: 'splatvar_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; AVX2-LABEL: 'splatvar_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v2i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v2i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 @@ -509,6 +521,12 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; +; BTVER2-LABEL: 'splatvar_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; %insert = insertelement <2 x i64> poison, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer %shift = ashr <2 x i64> %a, %splat @@ -519,19 +537,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' @@ -561,7 +579,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> poison, i64 %b, i32 0 @@ -574,19 +592,19 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' @@ -616,7 +634,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> poison, i64 %b, i32 0 @@ -629,13 +647,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' @@ -666,49 +684,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> poison, i32 %b, i32 0 @@ -721,49 +739,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> poison, i32 %b, i32 0 @@ -776,13 +794,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' @@ -813,49 +831,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> poison, i16 %b, i32 0 @@ -868,49 +886,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> poison, i16 %b, i32 0 @@ -923,61 +959,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512F-LABEL: 'splatvar_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> poison, i8 %b, i32 0 @@ -990,25 +1014,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -1026,31 +1050,31 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v32i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> poison, i8 %b, i32 0 @@ -1063,25 +1087,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1105,7 +1129,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' @@ -1117,13 +1141,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> poison, i8 %b, i32 0 @@ -1594,15 +1618,15 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' @@ -1615,27 +1639,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; -; XOP-LABEL: 'splatconstant_shift_v4i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1644,27 +1672,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; -; XOP-LABEL: 'splatconstant_shift_v8i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1686,19 +1718,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' @@ -1706,7 +1738,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1719,19 +1751,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' @@ -1739,7 +1771,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1761,27 +1793,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1794,23 +1838,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' @@ -1818,7 +1862,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' @@ -1826,7 +1870,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1835,20 +1879,44 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, ret <16 x i8> %shift @@ -1856,27 +1924,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; XOP-LABEL: 'splatconstant_shift_v32i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1885,39 +1969,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; -; XOP-LABEL: 'splatconstant_shift_v64i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index 2acff1c..6ac9849 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -482,26 +482,38 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; AVX1-LABEL: 'splatvar_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; AVX2-LABEL: 'splatvar_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v2i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v2i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -509,6 +521,12 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; +; BTVER2-LABEL: 'splatvar_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer %shift = ashr <2 x i64> %a, %splat @@ -519,19 +537,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' @@ -561,7 +579,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -574,19 +592,19 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' @@ -616,7 +634,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -629,13 +647,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' @@ -666,49 +684,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -721,49 +739,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -776,13 +794,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' @@ -813,49 +831,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -868,49 +886,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -923,61 +959,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512F-LABEL: 'splatvar_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -990,25 +1014,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -1026,31 +1050,31 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v32i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v32i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v32i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1063,25 +1087,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1105,7 +1129,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' @@ -1117,13 +1141,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %shift = ashr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -1594,15 +1618,15 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' @@ -1615,27 +1639,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; -; XOP-LABEL: 'splatconstant_shift_v4i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1644,27 +1672,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; -; XOP-LABEL: 'splatconstant_shift_v8i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1686,19 +1718,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' @@ -1706,7 +1738,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1719,19 +1751,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' @@ -1739,7 +1771,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1761,27 +1793,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1794,23 +1838,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' @@ -1818,7 +1862,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' @@ -1826,7 +1870,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1835,20 +1879,44 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, ret <16 x i8> %shift @@ -1856,27 +1924,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; XOP-LABEL: 'splatconstant_shift_v32i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1885,39 +1969,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; -; XOP-LABEL: 'splatconstant_shift_v64i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll index a6d86e3..1f087f9 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 @@ -138,31 +138,31 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatvar_shift_v2i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -175,49 +175,49 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -230,49 +230,49 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -285,33 +285,51 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; AVX1-LABEL: 'splatvar_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; +; BTVER2-LABEL: 'splatvar_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer %shift = ashr <4 x i32> %a, %splat @@ -322,49 +340,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -377,49 +395,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -432,33 +450,51 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'splatvar_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift -; -; XOP-LABEL: 'splatvar_shift_v8i16' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'splatvar_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; +; BTVER2-LABEL: 'splatvar_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; %insert = insertelement <8 x i16> undef, i16 %b, i32 0 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer %shift = ashr <8 x i16> %a, %splat @@ -469,49 +505,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -524,49 +560,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -579,32 +621,56 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -616,25 +682,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,19 +712,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -671,25 +743,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,19 +773,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -839,108 +917,452 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX-LABEL: 'splatconstant_shift_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'splatconstant_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, ret <2 x i64> %shift } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, ret <8 x i64> %shift } define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, ret <4 x i32> %shift } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, ret <16 x i32> %shift } define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, ret <8 x i16> %shift } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll index 4918e53..aa148e6 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 @@ -138,31 +138,31 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatvar_shift_v2i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <2 x i64> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -175,49 +175,49 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -230,49 +230,49 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -285,31 +285,31 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOP-LABEL: 'splatvar_shift_v4i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -432,31 +432,31 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'splatvar_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %insert = insertelement <8 x i16> undef, i16 %b, i32 0 @@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -579,32 +585,44 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatvar_shift_v16i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -616,25 +634,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,19 +664,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -671,25 +695,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,19 +725,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -839,27 +869,99 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX-LABEL: 'splatconstant_shift_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'splatconstant_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, ret <2 x i64> %shift } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, ret <8 x i64> %shift @@ -875,18 +977,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, ret <16 x i32> %shift @@ -902,45 +1060,197 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll index 2c178e6..04d44d5 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 @@ -175,19 +175,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' @@ -199,7 +199,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' @@ -217,7 +217,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -230,37 +230,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' @@ -272,7 +272,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -579,31 +585,37 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatvar_shift_v16i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -616,25 +628,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,19 +658,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -671,25 +683,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,19 +713,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -848,18 +866,74 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, ret <8 x i64> %shift @@ -875,18 +949,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, ret <16 x i32> %shift @@ -902,45 +1032,189 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll index 5ce2393..1adad5e 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll @@ -506,13 +506,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' @@ -543,37 +543,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' @@ -585,7 +585,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> poison, i64 %b, i32 0 @@ -598,37 +598,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' @@ -640,7 +640,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> poison, i64 %b, i32 0 @@ -653,13 +653,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' @@ -690,49 +690,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> poison, i32 %b, i32 0 @@ -745,49 +745,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> poison, i32 %b, i32 0 @@ -800,13 +800,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' @@ -837,49 +837,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> poison, i16 %b, i32 0 @@ -892,49 +892,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> poison, i16 %b, i32 0 @@ -947,61 +965,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512F-LABEL: 'splatvar_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> poison, i8 %b, i32 0 @@ -1014,25 +1020,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -1044,37 +1050,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512F-LABEL: 'splatvar_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512-LABEL: 'splatvar_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> poison, i8 %b, i32 0 @@ -1087,25 +1075,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1117,7 +1105,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' @@ -1129,7 +1117,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' @@ -1141,13 +1129,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> poison, i8 %b, i32 0 @@ -1651,19 +1639,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' @@ -1671,7 +1659,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1684,19 +1672,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' @@ -1704,7 +1692,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1726,19 +1714,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' @@ -1746,7 +1734,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1759,19 +1747,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' @@ -1779,7 +1767,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1801,27 +1789,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1834,23 +1834,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' @@ -1858,7 +1858,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' @@ -1866,7 +1866,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1875,32 +1875,40 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; +; BTVER2-LABEL: 'splatconstant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; %shift = lshr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' @@ -1915,12 +1923,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1929,11 +1949,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' @@ -1949,23 +1969,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 7c545bb..a07d505 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -506,13 +506,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' @@ -543,37 +543,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' @@ -585,7 +585,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -598,37 +598,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' @@ -640,7 +640,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -653,13 +653,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' @@ -690,49 +690,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -745,49 +745,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -800,13 +800,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' @@ -837,49 +837,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -892,49 +892,67 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -947,61 +965,49 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512F-LABEL: 'splatvar_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -1014,25 +1020,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -1044,37 +1050,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512F-LABEL: 'splatvar_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512-LABEL: 'splatvar_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %shift = lshr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1087,25 +1075,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1117,7 +1105,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatvar_shift_v64i8' @@ -1129,7 +1117,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' @@ -1141,13 +1129,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %shift = lshr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -1651,19 +1639,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' @@ -1671,7 +1659,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1684,19 +1672,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' @@ -1704,7 +1692,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1726,19 +1714,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' @@ -1746,7 +1734,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1759,19 +1747,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' @@ -1779,7 +1767,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1801,27 +1789,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1834,23 +1834,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' @@ -1858,7 +1858,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' @@ -1866,7 +1866,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1875,32 +1875,40 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; +; BTVER2-LABEL: 'splatconstant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; %shift = lshr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' @@ -1915,12 +1923,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1929,11 +1949,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' @@ -1949,23 +1969,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll index 7cacda16..6e73f62 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOPAVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX1 ; Verify the cost of vector logical shift right instructions. @@ -138,31 +138,43 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; AVX1-LABEL: 'splatvar_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <2 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v2i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <2 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v2i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -175,51 +187,45 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift -; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer %shift = lshr <4 x i64> %a, %splat @@ -230,51 +236,45 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift -; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer %shift = lshr <8 x i64> %a, %splat @@ -285,31 +285,43 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; AVX1-LABEL: 'splatvar_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -322,51 +334,45 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift -; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer %shift = lshr <8 x i32> %a, %splat @@ -377,51 +383,45 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift -; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer %shift = lshr <16 x i32> %a, %splat @@ -432,31 +432,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'splatvar_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift -; -; XOP-LABEL: 'splatvar_shift_v8i16' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'splatvar_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %insert = insertelement <8 x i16> undef, i16 %b, i32 0 @@ -469,51 +481,45 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift -; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer %shift = lshr <16 x i16> %a, %splat @@ -524,50 +530,50 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer @@ -579,32 +585,50 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -616,25 +640,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,21 +670,15 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift -; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer %shift = lshr <32 x i8> %a, %splat @@ -671,25 +689,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,20 +719,20 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer @@ -839,108 +857,412 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v2i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v2i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, ret <2 x i64> %shift } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, ret <8 x i64> %shift } define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, ret <4 x i32> %shift } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, ret <16 x i32> %shift } define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, ret <8 x i16> %shift } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll index 20e4458..c92d537 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 @@ -138,31 +138,31 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatvar_shift_v2i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -175,49 +175,49 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -230,49 +230,49 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -285,31 +285,31 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOP-LABEL: 'splatvar_shift_v4i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -432,31 +432,31 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'splatvar_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %insert = insertelement <8 x i16> undef, i16 %b, i32 0 @@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -579,31 +585,37 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatvar_shift_v16i8' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -616,25 +628,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,19 +658,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -671,25 +683,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = lshr <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,19 +713,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = lshr <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -848,18 +866,74 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, ret <8 x i64> %shift @@ -875,18 +949,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, ret <16 x i32> %shift @@ -902,45 +1032,193 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll index 8c1904b..e21a384 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 @@ -175,19 +175,19 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' @@ -199,7 +199,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' @@ -217,7 +217,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -230,37 +230,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' @@ -272,7 +272,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -322,49 +322,49 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -377,49 +377,49 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -469,49 +469,49 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -524,49 +524,55 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -579,33 +585,51 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer %shift = shl <16 x i8> %a, %splat @@ -616,25 +640,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,19 +670,19 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -671,25 +695,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,19 +725,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -976,18 +1006,74 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, ret <8 x i64> %shift @@ -1003,18 +1089,74 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, ret <16 x i32> %shift @@ -1030,9 +1172,41 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, ret <16 x i16> %shift @@ -1040,15 +1214,15 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' @@ -1056,19 +1230,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1076,27 +1254,107 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll index da1019b..b9dfbc0 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll @@ -538,13 +538,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' @@ -568,7 +568,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SLM-LABEL: 'splatvar_shift_v2i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> poison, i64 %b, i32 0 @@ -581,37 +581,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' @@ -623,13 +623,13 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SLM-LABEL: 'splatvar_shift_v4i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> poison, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> poison, i64 %b, i32 0 @@ -642,37 +642,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' @@ -684,13 +684,13 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SLM-LABEL: 'splatvar_shift_v8i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> poison, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> poison, i64 %b, i32 0 @@ -703,13 +703,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' @@ -733,7 +733,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SLM-LABEL: 'splatvar_shift_v4i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> poison, i32 %b, i32 0 @@ -746,55 +746,55 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SLM-LABEL: 'splatvar_shift_v8i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> poison, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> poison, i32 %b, i32 0 @@ -807,55 +807,55 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> poison, i32 %b, i32 0 @@ -868,13 +868,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' @@ -898,7 +898,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SLM-LABEL: 'splatvar_shift_v8i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %insert = insertelement <8 x i16> poison, i16 %b, i32 0 @@ -911,55 +911,55 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> poison, i16 %b, i32 0 @@ -972,55 +972,73 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> poison, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> poison, i16 %b, i32 0 @@ -1033,67 +1051,55 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512F-LABEL: 'splatvar_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> poison, i8 %b, i32 0 @@ -1106,25 +1112,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -1139,40 +1145,22 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512F-LABEL: 'splatvar_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512-LABEL: 'splatvar_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> poison, i8 %b, i32 0 @@ -1185,25 +1173,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1227,7 +1215,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' @@ -1239,19 +1227,19 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v64i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> poison, i8 %b, i32 0 @@ -1771,19 +1759,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' @@ -1791,7 +1779,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1804,19 +1792,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' @@ -1824,7 +1812,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1846,19 +1834,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' @@ -1866,7 +1854,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1879,19 +1867,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' @@ -1899,7 +1887,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1921,27 +1909,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1954,23 +1954,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' @@ -1978,7 +1978,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' @@ -1986,7 +1986,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1995,32 +1995,40 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; +; BTVER2-LABEL: 'splatconstant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' @@ -2035,12 +2043,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -2049,11 +2069,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' @@ -2069,23 +2089,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 98202fd..5c72d51 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -538,13 +538,13 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' @@ -568,7 +568,7 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SLM-LABEL: 'splatvar_shift_v2i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -581,37 +581,37 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' @@ -623,13 +623,13 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SLM-LABEL: 'splatvar_shift_v4i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -642,37 +642,37 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' @@ -684,13 +684,13 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SLM-LABEL: 'splatvar_shift_v8i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -703,13 +703,13 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' @@ -733,7 +733,7 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SLM-LABEL: 'splatvar_shift_v4i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -746,55 +746,55 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; SLM-LABEL: 'splatvar_shift_v8i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -807,55 +807,55 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -868,13 +868,13 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' @@ -898,7 +898,7 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SLM-LABEL: 'splatvar_shift_v8i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %insert = insertelement <8 x i16> undef, i16 %b, i32 0 @@ -911,55 +911,55 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -972,55 +972,73 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -1033,67 +1051,55 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512F-LABEL: 'splatvar_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -1106,25 +1112,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -1139,40 +1145,22 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512F-LABEL: 'splatvar_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BW-LABEL: 'splatvar_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512VL-LABEL: 'splatvar_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift -; -; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512-LABEL: 'splatvar_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1185,25 +1173,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1227,7 +1215,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' @@ -1239,19 +1227,19 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v64i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -1771,19 +1759,19 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' @@ -1791,7 +1779,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1804,19 +1792,19 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' @@ -1824,7 +1812,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1846,19 +1834,19 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' @@ -1866,7 +1854,7 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1879,19 +1867,19 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' @@ -1899,7 +1887,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1921,27 +1909,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1954,23 +1954,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' @@ -1978,7 +1978,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' @@ -1986,7 +1986,7 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1995,32 +1995,40 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; +; BTVER2-LABEL: 'splatconstant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' @@ -2035,12 +2043,24 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -2049,11 +2069,11 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' @@ -2069,23 +2089,23 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll index 3d3cd00..185c416 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOPAVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX1 ; Verify the cost of vector logical shift right instructions. @@ -138,31 +138,43 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; -; AVX-LABEL: 'splatvar_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift -; -; XOP-LABEL: 'splatvar_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; AVX1-LABEL: 'splatvar_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <2 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v2i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <2 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v2i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -175,51 +187,45 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift -; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer %shift = shl <4 x i64> %a, %splat @@ -230,51 +236,45 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift -; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer %shift = shl <8 x i64> %a, %splat @@ -285,31 +285,43 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; -; AVX-LABEL: 'splatvar_shift_v4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; AVX1-LABEL: 'splatvar_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; -; XOP-LABEL: 'splatvar_shift_v4i32' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; XOPAVX1-LABEL: 'splatvar_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -322,51 +334,45 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift -; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer %shift = shl <8 x i32> %a, %splat @@ -377,51 +383,45 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift -; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer %shift = shl <16 x i32> %a, %splat @@ -432,31 +432,43 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'splatvar_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift -; -; XOP-LABEL: 'splatvar_shift_v8i16' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'splatvar_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %insert = insertelement <8 x i16> undef, i16 %b, i32 0 @@ -469,51 +481,45 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift -; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer %shift = shl <16 x i16> %a, %splat @@ -524,50 +530,50 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer @@ -579,31 +585,43 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -616,25 +634,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -646,21 +664,15 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift -; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer %shift = shl <32 x i8> %a, %splat @@ -671,25 +683,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -701,20 +713,20 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; BTVER2-LABEL: 'splatvar_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer @@ -782,10 +794,6 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i32> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; -; BTVER2-LABEL: 'constant_shift_v4i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i32> %a, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift -; %shift = shl <4 x i32> %a, ret <4 x i32> %shift } @@ -819,10 +827,6 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i32> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; -; BTVER2-LABEL: 'constant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i32> %a, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift -; %shift = shl <8 x i32> %a, ret <8 x i32> %shift } @@ -856,10 +860,6 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; -; BTVER2-LABEL: 'constant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <16 x i32> %a, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift -; %shift = shl <16 x i32> %a, ret <16 x i32> %shift } @@ -902,10 +902,6 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; -; BTVER2-LABEL: 'constant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i16> %a, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift -; %shift = shl <16 x i16> %a, ret <16 x i16> %shift } @@ -935,9 +931,13 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; BTVER2-LABEL: 'constant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <32 x i16> %a, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'constant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, ret <32 x i16> %shift @@ -975,63 +975,231 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v2i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v2i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, ret <2 x i64> %shift } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, ret <8 x i64> %shift } define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, ret <4 x i32> %shift } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, ret <16 x i32> %shift } define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; SSE2-LABEL: 'splatconstant_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, ret <8 x i16> %shift @@ -1039,36 +1207,36 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; -; BTVER2-LABEL: 'splatconstant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, ret <16 x i16> %shift @@ -1076,59 +1244,143 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, ret <32 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE2-LABEL: 'splatconstant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatconstant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll index ff6c245..a8ef264 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll @@ -5,12 +5,12 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 @@ -138,37 +138,37 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v2i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v2i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatvar_shift_v2i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatvar_shift_v2i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v2i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SLM-LABEL: 'splatvar_shift_v2i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %insert = insertelement <2 x i64> undef, i64 %b, i32 0 @@ -181,55 +181,55 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v4i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v4i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v4i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v4i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SLM-LABEL: 'splatvar_shift_v4i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %insert = insertelement <4 x i64> undef, i64 %b, i32 0 @@ -242,55 +242,55 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i64' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i64' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SLM-LABEL: 'splatvar_shift_v8i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %insert = insertelement <8 x i64> undef, i64 %b, i32 0 @@ -303,37 +303,37 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v4i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v4i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX-LABEL: 'splatvar_shift_v4i32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOP-LABEL: 'splatvar_shift_v4i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v4i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SLM-LABEL: 'splatvar_shift_v4i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %insert = insertelement <4 x i32> undef, i32 %b, i32 0 @@ -346,55 +346,55 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v8i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v8i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v8i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v8i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SLM-LABEL: 'splatvar_shift_v8i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %insert = insertelement <8 x i32> undef, i32 %b, i32 0 @@ -407,55 +407,55 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i32' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i32' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i32' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i32> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %insert = insertelement <16 x i32> undef, i32 %b, i32 0 @@ -468,37 +468,37 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v8i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v8i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX-LABEL: 'splatvar_shift_v8i16' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'splatvar_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v8i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v8i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> undef, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %insert = insertelement <8 x i16> undef, i16 %b, i32 0 @@ -511,55 +511,55 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v16i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v16i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v16i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v16i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i16' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %insert = insertelement <16 x i16> undef, i16 %b, i32 0 @@ -572,55 +572,61 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i16' ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatvar_shift_v32i16' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %insert = insertelement <32 x i16> undef, i16 %b, i32 0 @@ -633,39 +639,57 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v16i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v16i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift -; -; XOP-LABEL: 'splatvar_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v16i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v16i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = shl <16 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer %shift = shl <16 x i8> %a, %splat @@ -676,25 +700,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v32i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v32i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v32i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -706,25 +730,25 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v32i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512-LABEL: 'splatvar_shift_v32i8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v32i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = shl <32 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -737,25 +761,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; SSE2-LABEL: 'splatvar_shift_v64i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatvar_shift_v64i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatvar_shift_v64i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -767,25 +791,31 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; XOPAVX2-LABEL: 'splatvar_shift_v64i8' ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SLM-LABEL: 'splatvar_shift_v64i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = shl <64 x i8> %a, %splat ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -1064,18 +1094,66 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; SSE-LABEL: 'splatconstant_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, ret <4 x i64> %shift } define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; SSE-LABEL: 'splatconstant_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, ret <8 x i64> %shift @@ -1091,18 +1169,66 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v8i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; SSE-LABEL: 'splatconstant_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, ret <8 x i32> %shift } define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; SSE-LABEL: 'splatconstant_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, ret <16 x i32> %shift @@ -1118,9 +1244,37 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; SSE-LABEL: 'splatconstant_shift_v16i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, ret <16 x i16> %shift @@ -1128,31 +1282,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; -; AVX512-LABEL: 'splatconstant_shift_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'splatconstant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1160,27 +1318,99 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v16i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; SSE-LABEL: 'splatconstant_shift_v16i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v32i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; SSE-LABEL: 'splatconstant_shift_v32i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, ret <32 x i8> %shift } define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { -; CHECK-LABEL: 'splatconstant_shift_v64i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; SSE-LABEL: 'splatconstant_shift_v64i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, ret <64 x i8> %shift diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll index 79426a9..5af7afc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll @@ -3,8 +3,8 @@ ; CHECK: 'foo' ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %shift = ashr i32 %val, %k -; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %shift = ashr i32 %val, %k +; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %shift = ashr i32 %val, %k +; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %shift = ashr i32 %val, %k define void @foo(i32* nocapture %p, i32 %k) local_unnamed_addr #0 { entry: br label %body -- 2.7.4