We were defaulting to SSE2 costs which weren't taking into account the availability of PBLENDW/PBLENDVB to improve merging of per-element shift results.
llvm-svn: 284939
ISD = ISD::MUL;
}
+ static const CostTblEntry SSE41CostTable[] = {
+ { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence.
+
+ { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v16i16, 2*14 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend.
+ { ISD::SRL, MVT::v8i32, 2*11 }, // Shift each lane + blend.
+
+ { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v32i8, 2*24 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
+ { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend.
+ };
+
+ if (ST->hasSSE41()) {
+ if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, %b
ret <8 x i16> %shift
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, %b
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, %b
ret <16 x i8> %shift
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, %b
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i16> %a, %splat
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i8> %a, %splat
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, %b
ret <8 x i16> %shift
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, %b
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, %b
ret <16 x i8> %shift
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, %b
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i16> %a, %splat
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i8> %a, %splat
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, %b
ret <8 x i16> %shift
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <16 x i16> %a, %b
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, %b
ret <16 x i8> %shift
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, %b
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i16> %a, %splat
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i8> %a, %splat
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>