From 6ac1e98b09ab38adde003620cc77172196b2ef62 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Oct 2016 16:49:04 +0000 Subject: [PATCH] [X86][SSE] Add SSE41/AVX1 costs for vector shifts. We were defaulting to SSE2 costs which weren't taking into account the availability of PBLENDW/PBLENDVB to improve merging of per-element shift results. llvm-svn: 284939 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 26 +++++++ .../Analysis/CostModel/X86/vshift-ashr-cost.ll | 84 +++++++++++----------- .../Analysis/CostModel/X86/vshift-lshr-cost.ll | 84 +++++++++++----------- .../test/Analysis/CostModel/X86/vshift-shl-cost.ll | 50 ++++++------- 4 files changed, 135 insertions(+), 109 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 7f6dc2b..a2cc73a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -400,6 +400,32 @@ int X86TTIImpl::getArithmeticInstrCost( ISD = ISD::MUL; } + static const CostTblEntry SSE41CostTable[] = { + { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. + { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence. + { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence. + + { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. + { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence. + { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRL, MVT::v16i16, 2*14 }, // pblendvb sequence. + { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. + { ISD::SRL, MVT::v8i32, 2*11 }, // Shift each lane + blend. + + { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. + { ISD::SRA, MVT::v32i8, 2*24 }, // pblendvb sequence. + { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence. + { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. + { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend. + }; + + if (ST->hasSSE41()) { + if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry SSE2CostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index a0d07d7..e53e40b 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -36,8 +36,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift -; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 12 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -48,8 +48,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -60,9 +60,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <8 x i16> %a, %b ret <8 x i16> %shift @@ -71,8 +71,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i16> %a, %b @@ -82,9 +82,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': ; SSE2: Found an estimated cost of 54 for instruction: %shift -; SSE41: Found an estimated cost of 54 for instruction: %shift -; AVX: Found an estimated cost of 54 for instruction: %shift -; AVX2: Found an estimated cost of 54 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i8> %a, %b ret <16 x i8> %shift @@ -93,8 +93,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift -; SSE41: Found an estimated cost of 108 for instruction: %shift -; AVX: Found an estimated cost of 108 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, %b @@ -132,8 +132,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift -; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 12 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -145,8 +145,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -158,9 +158,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer %shift = ashr <8 x i16> %a, %splat @@ -170,8 +170,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer @@ -182,9 +182,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': ; SSE2: Found an estimated cost of 54 for instruction: %shift -; SSE41: Found an estimated cost of 54 for instruction: %shift -; AVX: Found an estimated cost of 54 for instruction: %shift -; AVX2: Found an estimated cost of 54 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer %shift = ashr <16 x i8> %a, %splat @@ -194,8 +194,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift -; SSE41: Found an estimated cost of 108 for instruction: %shift -; AVX: Found an estimated cost of 108 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer @@ -232,8 +232,8 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift -; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 12 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -244,8 +244,8 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -256,9 +256,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <8 x i16> %a, ret <8 x i16> %shift @@ -267,8 +267,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <16 x i16> %a, @@ -278,9 +278,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': ; SSE2: Found an estimated cost of 54 for instruction: %shift -; SSE41: Found an estimated cost of 54 for instruction: %shift -; AVX: Found an estimated cost of 54 for instruction: %shift -; AVX2: Found an estimated cost of 54 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i8> %a, ret <16 x i8> %shift @@ -289,8 +289,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift -; SSE41: Found an estimated cost of 108 for instruction: %shift -; AVX: Found an estimated cost of 108 for instruction: %shift +; SSE41: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index a686b43..6d02826 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -38,8 +38,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift -; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 11 for instruction: %shift +; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -50,8 +50,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -62,9 +62,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i16> %a, %b ret <8 x i16> %shift @@ -73,8 +73,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <16 x i16> %a, %b @@ -84,9 +84,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift -; SSE41: Found an estimated cost of 26 for instruction: %shift -; AVX: Found an estimated cost of 26 for instruction: %shift -; AVX2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 12 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift +; AVX2: Found an estimated cost of 12 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, %b ret <16 x i8> %shift @@ -95,8 +95,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift -; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <32 x i8> %a, %b @@ -136,8 +136,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift -; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 11 for instruction: %shift +; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -149,8 +149,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -162,9 +162,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer %shift = lshr <8 x i16> %a, %splat @@ -174,8 +174,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer @@ -186,9 +186,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift -; SSE41: Found an estimated cost of 26 for instruction: %shift -; AVX: Found an estimated cost of 26 for instruction: %shift -; AVX2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 12 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift +; AVX2: Found an estimated cost of 12 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer %shift = lshr <16 x i8> %a, %splat @@ -198,8 +198,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift -; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer @@ -238,8 +238,8 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': ; SSE2: Found an estimated cost of 16 for instruction: %shift -; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; SSE41: Found an estimated cost of 11 for instruction: %shift +; AVX: Found an estimated cost of 11 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -250,8 +250,8 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift @@ -262,9 +262,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i16> %a, ret <8 x i16> %shift @@ -273,8 +273,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <16 x i16> %a, @@ -284,9 +284,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift -; SSE41: Found an estimated cost of 26 for instruction: %shift -; AVX: Found an estimated cost of 26 for instruction: %shift -; AVX2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 12 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift +; AVX2: Found an estimated cost of 12 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i8> %a, ret <16 x i8> %shift @@ -295,8 +295,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift -; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 4 for instruction: %shift %shift = lshr <32 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 85ca5a5..60ba3ad 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -63,9 +63,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i16> %a, %b ret <8 x i16> %shift @@ -74,8 +74,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i16> %a, %b @@ -85,9 +85,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift -; SSE41: Found an estimated cost of 26 for instruction: %shift -; AVX: Found an estimated cost of 26 for instruction: %shift -; AVX2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 11 for instruction: %shift +; AVX: Found an estimated cost of 11 for instruction: %shift +; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, %b ret <16 x i8> %shift @@ -96,8 +96,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift -; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, %b @@ -163,9 +163,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift -; SSE41: Found an estimated cost of 32 for instruction: %shift -; AVX: Found an estimated cost of 32 for instruction: %shift -; AVX2: Found an estimated cost of 32 for instruction: %shift +; SSE41: Found an estimated cost of 14 for instruction: %shift +; AVX: Found an estimated cost of 14 for instruction: %shift +; AVX2: Found an estimated cost of 14 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer %shift = shl <8 x i16> %a, %splat @@ -175,8 +175,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift -; SSE41: Found an estimated cost of 64 for instruction: %shift -; AVX: Found an estimated cost of 64 for instruction: %shift +; SSE41: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 28 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer @@ -187,9 +187,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift -; SSE41: Found an estimated cost of 26 for instruction: %shift -; AVX: Found an estimated cost of 26 for instruction: %shift -; AVX2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 11 for instruction: %shift +; AVX: Found an estimated cost of 11 for instruction: %shift +; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer %shift = shl <16 x i8> %a, %splat @@ -199,8 +199,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift -; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer @@ -286,9 +286,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift -; SSE41: Found an estimated cost of 26 for instruction: %shift -; AVX: Found an estimated cost of 26 for instruction: %shift -; AVX2: Found an estimated cost of 26 for instruction: %shift +; SSE41: Found an estimated cost of 11 for instruction: %shift +; AVX: Found an estimated cost of 11 for instruction: %shift +; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, ret <16 x i8> %shift @@ -297,8 +297,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift -; SSE41: Found an estimated cost of 52 for instruction: %shift -; AVX: Found an estimated cost of 52 for instruction: %shift +; SSE41: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 22 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, -- 2.7.4