From 3036547248966a7cfc376eed41a0d2d1e9014348 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 24 Dec 2020 12:14:19 +0900 Subject: [PATCH] Precommit analysis/etc tests for inselt poison placeholder This adds tests in directories missing from https://reviews.llvm.org/rGdb7a2f347f132b3920415013d62d1adfb18d8d58 --- .../CostModel/AArch64/kryo-inseltpoison.ll | 26 + .../PowerPC/insert_extract-inseltpoison.ll | 187 ++ .../CostModel/SystemZ/vectorinstrs-inseltpoison.ll | 56 + .../X86/insert-extract-at-zero-inseltpoison.ll | 40 + .../Analysis/CostModel/X86/loop_v2-inseltpoison.ll | 39 + .../X86/masked-intrinsic-cost-inseltpoison.ll | 1911 +++++++++++++++++ .../CostModel/X86/uniformshift-inseltpoison.ll | 39 + .../CostModel/X86/vector-insert-inseltpoison.ll | 1270 +++++++++++ .../CostModel/X86/vector_gep-inseltpoison.ll | 17 + .../CostModel/X86/vshift-ashr-cost-inseltpoison.ll | 1843 ++++++++++++++++ .../CostModel/X86/vshift-lshr-cost-inseltpoison.ll | 1867 +++++++++++++++++ .../CostModel/X86/vshift-shl-cost-inseltpoison.ll | 2197 ++++++++++++++++++++ .../Analysis/DemandedBits/vectors-inseltpoison.ll | 136 ++ .../X86/stack-value-piece-inseltpoison.ll | 114 + llvm/test/Other/scalable-vectors-core-ir.ll | 12 +- 15 files changed, 9753 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/PowerPC/insert_extract-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/SystemZ/vectorinstrs-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/vector_gep-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll create mode 100644 llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll create mode 100644 llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll create mode 100644 llvm/test/DebugInfo/X86/stack-value-piece-inseltpoison.ll diff --git a/llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll b/llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll new file mode 100644 index 0000000..cb9f992 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -cost-model -analyze -mcpu=kryo | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; CHECK-LABEL: vectorInstrCost +define void @vectorInstrCost() { + + ; Vector extracts - extracting the first element should have a zero cost; + ; all other elements should have a cost of two. + ; + ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0 + ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1 + %t1 = extractelement <2 x i64> undef, i32 0 + %t2 = extractelement <2 x i64> undef, i32 1 + + ; Vector inserts - inserting the first element should have a zero cost; all + ; other elements should have a cost of two. + ; + ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 0 + ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 1 + %t3 = insertelement <2 x i64> poison, i64 undef, i32 0 + %t4 = insertelement <2 x i64> poison, i64 undef, i32 1 + + ret void +} diff --git a/llvm/test/Analysis/CostModel/PowerPC/insert_extract-inseltpoison.ll b/llvm/test/Analysis/CostModel/PowerPC/insert_extract-inseltpoison.ll new file mode 100644 index 0000000..c2fc2cd --- /dev/null +++ b/llvm/test/Analysis/CostModel/PowerPC/insert_extract-inseltpoison.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck --check-prefix=CHECK-P7 %s +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck --check-prefix=CHECK-P8LE %s +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -mattr=+vsx | FileCheck --check-prefix=CHECK-P9BE %s +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -mattr=+vsx | FileCheck --check-prefix=CHECK-P9LE %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @insert(i32 %arg) { + ; CHECK: cost of 10 {{.*}} insertelement +; CHECK-P7-LABEL: 'insert' +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-P8LE-LABEL: 'insert' +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-P9BE-LABEL: 'insert' +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; CHECK-P9LE-LABEL: 'insert' +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %x = insertelement <4 x i32> poison, i32 %arg, i32 0 + ret i32 undef +} + +define i32 @extract(<4 x i32> %arg) { + ; CHECK: cost of 3 {{.*}} extractelement +; CHECK-P7-LABEL: 'extract' +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 0 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x +; +; CHECK-P8LE-LABEL: 'extract' +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x +; +; CHECK-P9BE-LABEL: 'extract' +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x +; +; CHECK-P9LE-LABEL: 'extract' +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x +; + %x = extractelement <4 x i32> %arg, i32 0 + ret i32 %x +} + +define void @test2xdouble(<2 x double> %arg1) { +; CHECK-P7-LABEL: 'test2xdouble' +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P8LE-LABEL: 'test2xdouble' +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9BE-LABEL: 'test2xdouble' +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9LE-LABEL: 'test2xdouble' +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v1 = extractelement <2 x double> %arg1, i32 0 + %v2 = extractelement <2 x double> %arg1, i32 1 + ret void +} + +define void @test4xi32(<4 x i32> %v1, i32 %x1) { +; CHECK-P7-LABEL: 'test4xi32' +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P8LE-LABEL: 'test4xi32' +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9BE-LABEL: 'test4xi32' +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9LE-LABEL: 'test4xi32' +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2 + ret void +} + +define void @vexti32(<4 x i32> %p1) { +; CHECK-P7-LABEL: 'vexti32' +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P8LE-LABEL: 'vexti32' +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9BE-LABEL: 'vexti32' +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9LE-LABEL: 'vexti32' +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %i1 = extractelement <4 x i32> %p1, i32 0 + %i2 = extractelement <4 x i32> %p1, i32 1 + %i3 = extractelement <4 x i32> %p1, i32 2 + %i4 = extractelement <4 x i32> %p1, i32 3 + ret void +} + +define void @vexti64(<2 x i64> %p1) { +; CHECK-P7-LABEL: 'vexti64' +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P8LE-LABEL: 'vexti64' +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9BE-LABEL: 'vexti64' +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9LE-LABEL: 'vexti64' +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %i1 = extractelement <2 x i64> %p1, i32 0 + %i2 = extractelement <2 x i64> %p1, i32 1 + ret void +} + +define void @vext(<8 x i16> %p1, <16 x i8> %p2) { +; CHECK-P7-LABEL: 'vext' +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 +; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P8LE-LABEL: 'vext' +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 +; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9BE-LABEL: 'vext' +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 +; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-P9LE-LABEL: 'vext' +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0 +; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %i1 = extractelement <8 x i16> %p1, i32 0 + %i2 = extractelement <16 x i8> %p2, i32 0 + ret void +} diff --git a/llvm/test/Analysis/CostModel/SystemZ/vectorinstrs-inseltpoison.ll b/llvm/test/Analysis/CostModel/SystemZ/vectorinstrs-inseltpoison.ll new file mode 100644 index 0000000..696550a --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/vectorinstrs-inseltpoison.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +; CHECK: vecinstrs +define void @vecinstrs() { + + ;; Extract element is penalized somewhat with a cost of 2 for index 0. + extractelement <16 x i8> undef, i32 0 + extractelement <16 x i8> undef, i32 1 + + extractelement <8 x i16> undef, i32 0 + extractelement <8 x i16> undef, i32 1 + + extractelement <4 x i32> undef, i32 0 + extractelement <4 x i32> undef, i32 1 + + extractelement <2 x i64> undef, i32 0 + extractelement <2 x i64> undef, i32 1 + + extractelement <2 x double> undef, i32 0 + extractelement <2 x double> undef, i32 1 + + ; Extraction of i1 means extract + test under mask before branch. + extractelement <2 x i1> undef, i32 0 + extractelement <4 x i1> undef, i32 1 + extractelement <8 x i1> undef, i32 2 + + ;; Insert element + insertelement <16 x i8> poison, i8 undef, i32 0 + insertelement <8 x i16> poison, i16 undef, i32 0 + insertelement <4 x i32> poison, i32 undef, i32 0 + + ; vlvgp will do two grs into a vector register: only add cost half of the time. + insertelement <2 x i64> poison, i64 undef, i32 0 + insertelement <2 x i64> poison, i64 undef, i32 1 + + ret void + +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <16 x i8> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = extractelement <16 x i8> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %3 = extractelement <8 x i16> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = extractelement <8 x i16> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %5 = extractelement <4 x i32> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = extractelement <4 x i32> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %7 = extractelement <2 x i64> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = extractelement <2 x i64> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = extractelement <2 x double> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = extractelement <2 x double> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %11 = extractelement <2 x i1> undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %12 = extractelement <4 x i1> undef, i32 1 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %13 = extractelement <8 x i1> undef, i32 2 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = insertelement <16 x i8> poison, i8 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = insertelement <8 x i16> poison, i16 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = insertelement <4 x i32> poison, i32 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %17 = insertelement <2 x i64> poison, i64 undef, i32 0 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %18 = insertelement <2 x i64> poison, i64 undef, i32 1 +} diff --git a/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll new file mode 100644 index 0000000..0d5b07c --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/insert-extract-at-zero-inseltpoison.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) { + ;CHECK: cost of 0 {{.*}} extract + %A = extractelement <4 x float> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %B = extractelement <4 x i32> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %C = extractelement <4 x float> undef, i32 1 + + ;CHECK: cost of 0 {{.*}} extract + %D = extractelement <8 x float> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %E = extractelement <8 x float> undef, i32 1 + + ;CHECK: cost of 1 {{.*}} extract + %F = extractelement <8 x float> undef, i32 %arg + + ;CHECK: cost of 0 {{.*}} insert + %G = insertelement <4 x float> poison, float %fl, i32 0 + ;CHECK: cost of 1 {{.*}} insert + %H = insertelement <4 x float> poison, float %fl, i32 1 + ;CHECK: cost of 1 {{.*}} insert + %I = insertelement <4 x i32> poison, i32 %arg, i32 0 + + ;CHECK: cost of 0 {{.*}} insert + %J = insertelement <4 x double> poison, double undef, i32 0 + + ;CHECK: cost of 0 {{.*}} insert + %K = insertelement <8 x double> poison, double undef, i32 4 + ;CHECK: cost of 0 {{.*}} insert + %L = insertelement <16 x double> poison, double undef, i32 8 + ;CHECK: cost of 1 {{.*}} insert + %M = insertelement <16 x double> poison, double undef, i32 9 + ret i32 0 +} + diff --git a/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll new file mode 100644 index 0000000..ef411b5 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/loop_v2-inseltpoison.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +define i32 @foo(i32* nocapture %A) nounwind uwtable readonly ssp { +vector.ph: + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] + %0 = getelementptr inbounds i32, i32* %A, i64 %index + %1 = bitcast i32* %0 to <2 x i32>* + %2 = load <2 x i32>, <2 x i32>* %1, align 4 + %3 = sext <2 x i32> %2 to <2 x i64> + ;CHECK: cost of 1 {{.*}} extract + %4 = extractelement <2 x i64> %3, i32 0 + %5 = getelementptr inbounds i32, i32* %A, i64 %4 + ;CHECK: cost of 1 {{.*}} extract + %6 = extractelement <2 x i64> %3, i32 1 + %7 = getelementptr inbounds i32, i32* %A, i64 %6 + %8 = load i32, i32* %5, align 4 + ;CHECK: cost of 1 {{.*}} insert + %9 = insertelement <2 x i32> poison, i32 %8, i32 0 + %10 = load i32, i32* %7, align 4 + ;CHECK: cost of 1 {{.*}} insert + %11 = insertelement <2 x i32> %9, i32 %10, i32 1 + %12 = add nsw <2 x i32> %11, %vec.phi + %index.next = add i64 %index, 2 + %13 = icmp eq i64 %index.next, 192 + br i1 %13, label %for.end, label %vector.body + +for.end: ; preds = %vector.body + %14 = extractelement <2 x i32> %12, i32 0 + %15 = extractelement <2 x i32> %12, i32 1 + %16 = add i32 %14, %15 + ret i32 %16 +} diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll new file mode 100644 index 0000000..0f6f6e2 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -0,0 +1,1911 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -cost-model -analyze | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze | FileCheck %s --check-prefixes=AVX512,SKX + +define i32 @masked_load() { +; SSE2-LABEL: 'masked_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_load' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 292 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_load' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_load' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_store() { +; SSE2-LABEL: 'masked_store' +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_store' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_store' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_store' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 168 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 352 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_store' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; + call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) + call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_gather() { +; SSE2-LABEL: 'masked_gather' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_gather' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_gather' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_gather' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_gather' +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_gather' +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_gather' +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_scatter() { +; SSE2-LABEL: 'masked_scatter' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_scatter' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_scatter' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_scatter' +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 88 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 176 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_scatter' +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; + call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) + call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_expandload() { +; SSE2-LABEL: 'masked_expandload' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_expandload' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_expandload' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_expandload' +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_compressstore() { +; SSE2-LABEL: 'masked_compressstore' +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 311 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 155 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_compressstore' +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 191 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_compressstore' +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 127 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 255 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 127 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_compressstore' +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 119 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 239 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) + + ret i32 0 +} + +define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) { +; SSE2-LABEL: 'test1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; AVX-LABEL: 'test1' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; + %mask = icmp eq <2 x i64> %trigger, zeroinitializer + %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) + ret <2 x double> %res +} + +define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { +; SSE2-LABEL: 'test2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; AVX-LABEL: 'test2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; AVX512-LABEL: 'test2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) + ret <4 x i32> %res +} + +define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask) + ret void +} + +define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res +; +; SKL-LABEL: 'test4' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res +; + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) + ret <8 x float> %res +} + +define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { +; SSE2-LABEL: 'test5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test5' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask) + ret void +} + +define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { +; SSE2-LABEL: 'test6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test6' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) + ret void +} + +define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res +; +; AVX-LABEL: 'test7' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) + ret <2 x float> %res +} + +define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { +; SSE2-LABEL: 'test8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res +; +; SSE42-LABEL: 'test8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res +; +; AVX-LABEL: 'test8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) + ret <2 x i32> %res +} + +define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) { +; SSE2-LABEL: 'test_gather_2f64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test_gather_2f64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; AVX1-LABEL: 'test_gather_2f64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; AVX2-LABEL: 'test_gather_2f64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; SKL-LABEL: 'test_gather_2f64' +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test_gather_2f64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) + ret <4 x i32> %res +} + +define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) + ret <4 x i32> %res +} + +define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_ra_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask2' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res +; + %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0 + %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +; SSE2-LABEL: 'test_scatter_16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX1-LABEL: 'test_scatter_16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'test_scatter_16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SKL-LABEL: 'test_scatter_16i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SKL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0 + %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) + ret void +} + +define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_8i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) + ret void +} + +define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; KNL-LABEL: 'test_scatter_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SKX-LABEL: 'test_scatter_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) + ret void +} + +define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) { +; SSE2-LABEL: 'test_gather_4f32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + ret <4 x float>%res +} + +define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { +; SSE2-LABEL: 'test_gather_4f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + ret <4 x float>%res +} + +declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>*, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>*, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>*, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>) +declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) +declare void @llvm.masked.store.v1f64.p0v1f64(<1 x double>, <1 x double>*, i32, <1 x i1>) + +declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>) + +declare void @llvm.masked.store.v8i64.p0v8i64(<8 x i64>, <8 x i64>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>) +declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) +declare void @llvm.masked.store.v1i64.p0v1i64(<1 x i64>, <1 x i64>*, i32, <1 x i1>) + +declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>) + +declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>) +declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) + +declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>) +declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*>, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*>, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*>, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*>, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double>, <8 x double*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double>, <1 x double*>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float>, <2 x float*>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64>, <8 x i64*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64>, <1 x i64*>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>, <16 x i32*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16>, <32 x i16*>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16>, <16 x i16*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>) + +declare void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8>, <64 x i8*>, i32, <64 x i1>) +declare void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.expandload.v4f64(double*, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.expandload.v2f64(double*, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.expandload.v1f64(double*, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(float*, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.expandload.v2f32(float*, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.expandload.v8i64(i64*, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.expandload.v4i64(i64*, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.expandload.v1i64(i64*, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.expandload.v16i32(i32*, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.expandload.v8i32(i32*, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.expandload.v4i32(i32*, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.expandload.v2i32(i32*, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.expandload.v4i16(i16*, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.expandload.v8i8(i8*, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.compressstore.v8f64(<8 x double>, double*, <8 x i1>) +declare void @llvm.masked.compressstore.v4f64(<4 x double>, double*, <4 x i1>) +declare void @llvm.masked.compressstore.v2f64(<2 x double>, double*, <2 x i1>) +declare void @llvm.masked.compressstore.v1f64(<1 x double>, double*, <1 x i1>) + +declare void @llvm.masked.compressstore.v16f32(<16 x float>, float*, <16 x i1>) +declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>) +declare void @llvm.masked.compressstore.v4f32(<4 x float>, float*, <4 x i1>) +declare void @llvm.masked.compressstore.v2f32(<2 x float>, float*, <2 x i1>) + +declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64*, <8 x i1>) +declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64*, <4 x i1>) +declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64*, <2 x i1>) +declare void @llvm.masked.compressstore.v1i64(<1 x i64>, i64*, <1 x i1>) + +declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32*, <16 x i1>) +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>) +declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>) +declare void @llvm.masked.compressstore.v2i32(<2 x i32>, i32*, <2 x i1>) + +declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>) +declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>) +declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>) +declare void @llvm.masked.compressstore.v4i16(<4 x i16>, i16*, <4 x i1>) + +declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>) +declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>) +declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>) +declare void @llvm.masked.compressstore.v8i8(<8 x i8>, i8*, <8 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll new file mode 100644 index 0000000..bfbfcdb --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/uniformshift-inseltpoison.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mattr=+sse2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s +; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s + +define <4 x i32> @shl(<4 x i32> %vector, i32 %scalar) { +entry: + ; SSE2: 'shl' + ; SSE2: cost of 1 {{.*}} shl + ; SSE2-CODEGEN: movd %edi, %xmm1 + ; SSE2-CODEGEN: pslld %xmm1, %xmm0 + %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + %ret = shl <4 x i32> %vector , %splat + ret <4 x i32> %ret +} + +define <4 x i32> @ashr(<4 x i32> %vector, i32 %scalar) { +entry: + ; SSE2: 'ashr' + ; SSE2: cost of 1 {{.*}} ashr + ; SSE2-CODEGEN: movd %edi, %xmm1 + ; SSE2-CODEGEN: psrad %xmm1, %xmm0 + %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + %ret = ashr <4 x i32> %vector , %splat + ret <4 x i32> %ret +} + +define <4 x i32> @lshr(<4 x i32> %vector, i32 %scalar) { +entry: + ; SSE2: 'lshr' + ; SSE2: cost of 1 {{.*}} lshr + ; SSE2-CODEGEN: movd %edi, %xmm1 + ; SSE2-CODEGEN: psrld %xmm1, %xmm0 + %insert = insertelement <4 x i32> poison, i32 %scalar, i32 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + %ret = lshr <4 x i32> %vector , %splat + ret <4 x i32> %ret +} + diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll new file mode 100644 index 0000000..f6f5ed8 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vector-insert-inseltpoison.ll @@ -0,0 +1,1270 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE4 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX + +define i32 @insert_double(i32 %arg) { +; SSE-LABEL: 'insert_double' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'insert_double' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'insert_double' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg + %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 + %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 + + %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg + %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 + %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 + + %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg + %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 + %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 + %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 + %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 + + ret i32 undef +} + +define i32 @insert_float(i32 %arg) { +; SSE2-LABEL: 'insert_float' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'insert_float' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'insert_float' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE4-LABEL: 'insert_float' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'insert_float' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'insert_float' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'insert_float' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'insert_float' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg + %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 + %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 + + %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg + %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 + %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 + + %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg + %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 + %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 + %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 + %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 + + %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg + %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 + %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 + %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 + %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 + + ret i32 undef +} + +define i32 @insert_i64(i32 %arg) { +; SSE2-LABEL: 'insert_i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'insert_i64' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'insert_i64' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE4-LABEL: 'insert_i64' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'insert_i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'insert_i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'insert_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'insert_i64' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg + %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 + %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 + + %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg + %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 + %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 + + %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg + %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 + %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 + %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 + %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 + + ret i32 undef +} + +define i32 @insert_i32(i32 %arg) { +; SSE2-LABEL: 'insert_i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'insert_i32' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'insert_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE4-LABEL: 'insert_i32' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'insert_i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'insert_i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'insert_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'insert_i32' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg + %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 + %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 + + %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg + %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 + %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 + + %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg + %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 + %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 + %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 + %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 + + %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg + %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 + %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 + %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 + %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 + + ret i32 undef +} + +define i32 @insert_i16(i32 %arg) { +; SSE-LABEL: 'insert_i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'insert_i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'insert_i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg + %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 + %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 + + %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg + %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 + %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 + + %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg + %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 + %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 + + %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg + %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 + %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 + %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 + %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 + + %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg + %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 + %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 + %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 + %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 + %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 + %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 + %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 + + ret i32 undef +} + +define i32 @insert_i8(i32 %arg) { +; SSE2-LABEL: 'insert_i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'insert_i8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'insert_i8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE4-LABEL: 'insert_i8' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'insert_i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'insert_i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'insert_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'insert_i8' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg + %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 + %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 + + %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg + %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 + %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 + + %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg + %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 + %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 + + %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg + %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 + %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 + %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 + + %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg + %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 + %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 + %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 + %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 + %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 + %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 + + %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg + %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 + %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 + %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 + %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 + %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 + %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 + %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 + %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 + %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 + + ret i32 undef +} + +define i32 @insert_i1(i32 %arg) { +; SSE2-LABEL: 'insert_i1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'insert_i1' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'insert_i1' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE4-LABEL: 'insert_i1' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'insert_i1' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'insert_i1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'insert_i1' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'insert_i1' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg + %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 + %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 + + %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg + %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 + %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 + + %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg + %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 + %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 + + %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg + %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 + %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 + %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 + + %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg + %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 + %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 + %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 + %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 + %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 + %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 + + %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg + %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 + %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 + %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 + %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 + %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 + %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 + %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 + %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 + %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/X86/vector_gep-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vector_gep-inseltpoison.ll new file mode 100644 index 0000000..8e8d6cb --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vector_gep-inseltpoison.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-linux-unknown-unknown -mattr=+avx512f | FileCheck %s + +%struct.S = type { [1000 x i32] } + + +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) + +define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){ + %temp = insertelement <4 x i64> poison, i64 %base, i32 0 + %vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S + %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32] + %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef) + ret <4 x i32> %res +} diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll new file mode 100644 index 0000000..47a3687 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll @@ -0,0 +1,1843 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VL +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BWVL +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 + +; Verify the cost of vector arithmetic shift right instructions. + +; +; Variable Shifts +; + +define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { +; SSE-LABEL: 'var_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'var_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = ashr <2 x i64> %a, %b + ret <2 x i64> %shift +} + +define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { +; SSE-LABEL: 'var_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <4 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <4 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOP-LABEL: 'var_shift_v4i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <4 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = ashr <4 x i64> %a, %b + ret <4 x i64> %shift +} + +define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; SSE-LABEL: 'var_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <8 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <8 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOP-LABEL: 'var_shift_v8i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <8 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = ashr <8 x i64> %a, %b + ret <8 x i64> %shift +} + +define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: 'var_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <4 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = ashr <4 x i32> %a, %b + ret <4 x i32> %shift +} + +define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { +; SSE2-LABEL: 'var_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <8 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = ashr <8 x i32> %a, %b + ret <8 x i32> %shift +} + +define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; SSE2-LABEL: 'var_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = ashr <16 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <16 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <16 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <16 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = ashr <16 x i32> %a, %b + ret <16 x i32> %shift +} + +define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { +; SSE2-LABEL: 'var_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <8 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'var_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'var_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v8i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v8i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v8i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v8i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = ashr <8 x i16> %a, %b + ret <8 x i16> %shift +} + +define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { +; SSE2-LABEL: 'var_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = ashr <16 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <16 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'var_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOP-LABEL: 'var_shift_v16i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <16 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = ashr <16 x i16> %a, %b + ret <16 x i16> %shift +} + +define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; SSE2-LABEL: 'var_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %shift = ashr <32 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %shift = ashr <32 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'var_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOP-LABEL: 'var_shift_v32i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <32 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = ashr <32 x i16> %a, %b + ret <32 x i16> %shift +} + +define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { +; SSE2-LABEL: 'var_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <16 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'var_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'var_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'var_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = ashr <16 x i8> %a, %b + ret <16 x i8> %shift +} + +define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { +; SSE2-LABEL: 'var_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <32 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <32 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'var_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOP-LABEL: 'var_shift_v32i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'var_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = ashr <32 x i8> %a, %b + ret <32 x i8> %shift +} + +define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; SSE2-LABEL: 'var_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %shift = ashr <64 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %shift = ashr <64 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'var_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOP-LABEL: 'var_shift_v64i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'var_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = ashr <64 x i8> %a, %b + ret <64 x i8> %shift +} + +; +; Uniform Variable Shifts +; + +define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX-LABEL: 'splatvar_shift_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'splatvar_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %insert = insertelement <2 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer + %shift = ashr <2 x i64> %a, %splat + ret <2 x i64> %shift +} + +define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatvar_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %insert = insertelement <4 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer + %shift = ashr <4 x i64> %a, %splat + ret <4 x i64> %shift +} + +define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatvar_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %insert = insertelement <8 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer + %shift = ashr <8 x i64> %a, %splat + ret <8 x i64> %shift +} + +define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX-LABEL: 'splatvar_shift_v4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOP-LABEL: 'splatvar_shift_v4i32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %insert = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + %shift = ashr <4 x i32> %a, %splat + ret <4 x i32> %shift +} + +define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatvar_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %insert = insertelement <8 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer + %shift = ashr <8 x i32> %a, %splat + ret <8 x i32> %shift +} + +define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatvar_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %insert = insertelement <16 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer + %shift = ashr <16 x i32> %a, %splat + ret <16 x i32> %shift +} + +define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'splatvar_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'splatvar_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %insert = insertelement <8 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer + %shift = ashr <8 x i16> %a, %splat + ret <8 x i16> %shift +} + +define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatvar_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %insert = insertelement <16 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer + %shift = ashr <16 x i16> %a, %splat + ret <16 x i16> %shift +} + +define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatvar_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %insert = insertelement <32 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer + %shift = ashr <32 x i16> %a, %splat + ret <32 x i16> %shift +} + +define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatvar_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'splatvar_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %insert = insertelement <16 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer + %shift = ashr <16 x i8> %a, %splat + ret <16 x i8> %shift +} + +define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatvar_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %insert = insertelement <32 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer + %shift = ashr <32 x i8> %a, %splat + ret <32 x i8> %shift +} + +define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %shift = ashr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatvar_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %insert = insertelement <64 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer + %shift = ashr <64 x i8> %a, %splat + ret <64 x i8> %shift +} + +; +; Constant Shifts +; + +define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { +; SSE-LABEL: 'constant_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'constant_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = ashr <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { +; SSE-LABEL: 'constant_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOP-LABEL: 'constant_shift_v4i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = ashr <4 x i64> %a, + ret <4 x i64> %shift +} + +define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { +; SSE-LABEL: 'constant_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOP-LABEL: 'constant_shift_v8i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = ashr <8 x i64> %a, + ret <8 x i64> %shift +} + +define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOP-LABEL: 'constant_shift_v4i32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = ashr <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = ashr <8 x i32> %a, + ret <8 x i32> %shift +} + +define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = ashr <16 x i32> %a, + ret <16 x i32> %shift +} + +define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { +; SSE2-LABEL: 'constant_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = ashr <8 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'constant_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'constant_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'constant_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512F-LABEL: 'constant_shift_v8i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v8i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512VL-LABEL: 'constant_shift_v8i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v8i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = ashr <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { +; SSE2-LABEL: 'constant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'constant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'constant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOP-LABEL: 'constant_shift_v16i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'constant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = ashr <16 x i16> %a, + ret <16 x i16> %shift +} + +define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { +; SSE2-LABEL: 'constant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'constant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'constant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOP-LABEL: 'constant_shift_v32i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'constant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = ashr <32 x i16> %a, + ret <32 x i16> %shift +} + +define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'constant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'constant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = ashr <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'constant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v32i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'constant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = ashr <32 x i8> %a, + ret <32 x i8> %shift +} + +define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'constant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v64i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'constant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = ashr <64 x i8> %a, + ret <64 x i8> %shift +} + +; +; Uniform Constant Shifts +; + +define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { +; SSE-LABEL: 'splatconstant_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX-LABEL: 'splatconstant_shift_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'splatconstant_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = ashr <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { +; SSE-LABEL: 'splatconstant_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOP-LABEL: 'splatconstant_shift_v4i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = ashr <4 x i64> %a, + ret <4 x i64> %shift +} + +define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { +; SSE-LABEL: 'splatconstant_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOP-LABEL: 'splatconstant_shift_v8i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = ashr <8 x i64> %a, + ret <8 x i64> %shift +} + +define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { +; CHECK-LABEL: 'splatconstant_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = ashr <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { +; SSE-LABEL: 'splatconstant_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = ashr <8 x i32> %a, + ret <8 x i32> %shift +} + +define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = ashr <16 x i32> %a, + ret <16 x i32> %shift +} + +define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { +; CHECK-LABEL: 'splatconstant_shift_v8i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = ashr <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = ashr <16 x i16> %a, + ret <16 x i16> %shift +} + +define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { +; SSE-LABEL: 'splatconstant_shift_v32i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = ashr <32 x i16> %a, + ret <32 x i16> %shift +} + +define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'splatconstant_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = ashr <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v32i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOP-LABEL: 'splatconstant_shift_v32i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = ashr <32 x i8> %a, + ret <32 x i8> %shift +} + +define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v64i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOP-LABEL: 'splatconstant_shift_v64i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = ashr <64 x i8> %a, + ret <64 x i8> %shift +} diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll new file mode 100644 index 0000000..2af629b --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll @@ -0,0 +1,1867 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VL +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BWVL +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 + +; Verify the cost of vector logical shift right instructions. + +; +; Variable Shifts +; + +define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { +; SSE-LABEL: 'var_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOPAVX1-LABEL: 'var_shift_v2i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOPAVX2-LABEL: 'var_shift_v2i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = lshr <2 x i64> %a, %b + ret <2 x i64> %shift +} + +define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { +; SSE-LABEL: 'var_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <4 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <4 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'var_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'var_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <4 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = lshr <4 x i64> %a, %b + ret <4 x i64> %shift +} + +define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; SSE-LABEL: 'var_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <8 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <8 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'var_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'var_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <8 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = lshr <8 x i64> %a, %b + ret <8 x i64> %shift +} + +define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: 'var_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <4 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v4i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v4i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = lshr <4 x i32> %a, %b + ret <4 x i32> %shift +} + +define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { +; SSE2-LABEL: 'var_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = lshr <8 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <8 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <8 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <8 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = lshr <8 x i32> %a, %b + ret <8 x i32> %shift +} + +define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; SSE2-LABEL: 'var_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = lshr <16 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = lshr <16 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <16 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <16 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = lshr <16 x i32> %a, %b + ret <16 x i32> %shift +} + +define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { +; SSE2-LABEL: 'var_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = lshr <8 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'var_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'var_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v8i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v8i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v8i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v8i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = lshr <8 x i16> %a, %b + ret <8 x i16> %shift +} + +define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { +; SSE2-LABEL: 'var_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = lshr <16 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <16 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'var_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOP-LABEL: 'var_shift_v16i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = lshr <16 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = lshr <16 x i16> %a, %b + ret <16 x i16> %shift +} + +define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; SSE2-LABEL: 'var_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %shift = lshr <32 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %shift = lshr <32 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'var_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOP-LABEL: 'var_shift_v32i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = lshr <32 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = lshr <32 x i16> %a, %b + ret <32 x i16> %shift +} + +define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { +; SSE2-LABEL: 'var_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <16 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'var_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'var_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'var_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = lshr <16 x i8> %a, %b + ret <16 x i8> %shift +} + +define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { +; SSE2-LABEL: 'var_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <32 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <32 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'var_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOP-LABEL: 'var_shift_v32i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'var_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = lshr <32 x i8> %a, %b + ret <32 x i8> %shift +} + +define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; SSE2-LABEL: 'var_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %shift = lshr <64 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <64 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'var_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOP-LABEL: 'var_shift_v64i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'var_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = lshr <64 x i8> %a, %b + ret <64 x i8> %shift +} + +; +; Uniform Variable Shifts +; + +define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX-LABEL: 'splatvar_shift_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'splatvar_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %insert = insertelement <2 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer + %shift = lshr <2 x i64> %a, %splat + ret <2 x i64> %shift +} + +define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatvar_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %insert = insertelement <4 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer + %shift = lshr <4 x i64> %a, %splat + ret <4 x i64> %shift +} + +define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatvar_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %insert = insertelement <8 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer + %shift = lshr <8 x i64> %a, %splat + ret <8 x i64> %shift +} + +define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX-LABEL: 'splatvar_shift_v4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOP-LABEL: 'splatvar_shift_v4i32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %insert = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + %shift = lshr <4 x i32> %a, %splat + ret <4 x i32> %shift +} + +define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatvar_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %insert = insertelement <8 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer + %shift = lshr <8 x i32> %a, %splat + ret <8 x i32> %shift +} + +define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatvar_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %insert = insertelement <16 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer + %shift = lshr <16 x i32> %a, %splat + ret <16 x i32> %shift +} + +define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'splatvar_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'splatvar_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %insert = insertelement <8 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer + %shift = lshr <8 x i16> %a, %splat + ret <8 x i16> %shift +} + +define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatvar_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %insert = insertelement <16 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer + %shift = lshr <16 x i16> %a, %splat + ret <16 x i16> %shift +} + +define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatvar_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %insert = insertelement <32 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer + %shift = lshr <32 x i16> %a, %splat + ret <32 x i16> %shift +} + +define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatvar_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'splatvar_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %insert = insertelement <16 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer + %shift = lshr <16 x i8> %a, %splat + ret <16 x i8> %shift +} + +define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatvar_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %insert = insertelement <32 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer + %shift = lshr <32 x i8> %a, %splat + ret <32 x i8> %shift +} + +define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatvar_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %insert = insertelement <64 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer + %shift = lshr <64 x i8> %a, %splat + ret <64 x i8> %shift +} + +; +; Constant Shifts +; + +define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { +; SSE-LABEL: 'constant_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'constant_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = lshr <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { +; SSE-LABEL: 'constant_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = lshr <4 x i64> %a, + ret <4 x i64> %shift +} + +define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { +; SSE-LABEL: 'constant_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = lshr <8 x i64> %a, + ret <8 x i64> %shift +} + +define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOP-LABEL: 'constant_shift_v4i32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = lshr <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = lshr <8 x i32> %a, + ret <8 x i32> %shift +} + +define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = lshr <16 x i32> %a, + ret <16 x i32> %shift +} + +define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { +; SSE2-LABEL: 'constant_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = lshr <8 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'constant_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'constant_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'constant_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512F-LABEL: 'constant_shift_v8i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v8i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512VL-LABEL: 'constant_shift_v8i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v8i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = lshr <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { +; SSE2-LABEL: 'constant_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'constant_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'constant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOP-LABEL: 'constant_shift_v16i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'constant_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = lshr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = lshr <16 x i16> %a, + ret <16 x i16> %shift +} + +define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { +; SSE2-LABEL: 'constant_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'constant_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'constant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOP-LABEL: 'constant_shift_v32i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'constant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = lshr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = lshr <32 x i16> %a, + ret <32 x i16> %shift +} + +define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'constant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'constant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = lshr <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'constant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v32i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'constant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = lshr <32 x i8> %a, + ret <32 x i8> %shift +} + +define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'constant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v64i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'constant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = lshr <64 x i8> %a, + ret <64 x i8> %shift +} + +; +; Uniform Constant Shifts +; + +define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { +; CHECK-LABEL: 'splatconstant_shift_v2i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = lshr <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { +; SSE-LABEL: 'splatconstant_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = lshr <4 x i64> %a, + ret <4 x i64> %shift +} + +define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { +; SSE-LABEL: 'splatconstant_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = lshr <8 x i64> %a, + ret <8 x i64> %shift +} + +define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { +; CHECK-LABEL: 'splatconstant_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = lshr <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { +; SSE-LABEL: 'splatconstant_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = lshr <8 x i32> %a, + ret <8 x i32> %shift +} + +define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = lshr <16 x i32> %a, + ret <16 x i32> %shift +} + +define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { +; CHECK-LABEL: 'splatconstant_shift_v8i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = lshr <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = lshr <16 x i16> %a, + ret <16 x i16> %shift +} + +define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { +; SSE-LABEL: 'splatconstant_shift_v32i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = lshr <32 x i16> %a, + ret <32 x i16> %shift +} + +define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'splatconstant_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = lshr <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v32i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = lshr <32 x i8> %a, + ret <32 x i8> %shift +} + +define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v64i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = lshr <64 x i8> %a, + ret <64 x i8> %shift +} diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll new file mode 100644 index 0000000..cf6144e --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll @@ -0,0 +1,2197 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=CHECK,XOP,XOPAVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VL +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BWVL +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,BTVER2 + +; Verify the cost of vector shift left instructions. + +; +; Variable Shifts +; + +define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { +; SSE-LABEL: 'var_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'var_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = shl <2 x i64> %a, %b + ret <2 x i64> %shift +} + +define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { +; SSE-LABEL: 'var_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <4 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'var_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'var_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = shl <4 x i64> %a, %b + ret <4 x i64> %shift +} + +define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { +; SSE-LABEL: 'var_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <8 x i64> %a, %b +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'var_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <8 x i64> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'var_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'var_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'var_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'var_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'var_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <8 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = shl <8 x i64> %a, %b + ret <8 x i64> %shift +} + +define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: 'var_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOP-LABEL: 'var_shift_v4i32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = shl <4 x i32> %a, %b + ret <4 x i32> %shift +} + +define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { +; SSE2-LABEL: 'var_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <8 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = shl <8 x i32> %a, %b + ret <8 x i32> %shift +} + +define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { +; SSE2-LABEL: 'var_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <16 x i32> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'var_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <16 x i32> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'var_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <16 x i32> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'var_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'var_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %b +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'var_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %b +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'var_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'var_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <16 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = shl <16 x i32> %a, %b + ret <16 x i32> %shift +} + +define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { +; SSE2-LABEL: 'var_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %shift = shl <8 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'var_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'var_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v8i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v8i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v8i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v8i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = shl <8 x i16> %a, %b + ret <8 x i16> %shift +} + +define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { +; SSE2-LABEL: 'var_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %shift = shl <16 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <16 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'var_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = shl <16 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOP-LABEL: 'var_shift_v16i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = shl <16 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = shl <16 x i16> %a, %b + ret <16 x i16> %shift +} + +define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { +; SSE2-LABEL: 'var_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %shift = shl <32 x i16> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'var_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %shift = shl <32 x i16> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'var_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = shl <32 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOP-LABEL: 'var_shift_v32i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'var_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = shl <32 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = shl <32 x i16> %a, %b + ret <32 x i16> %shift +} + +define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { +; SSE2-LABEL: 'var_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <16 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'var_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'var_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'var_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = shl <16 x i8> %a, %b + ret <16 x i8> %shift +} + +define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { +; SSE2-LABEL: 'var_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <32 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'var_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOP-LABEL: 'var_shift_v32i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'var_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = shl <32 x i8> %a, %b + ret <32 x i8> %shift +} + +define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { +; SSE2-LABEL: 'var_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %shift = shl <64 x i8> %a, %b +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'var_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %b +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'var_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOP-LABEL: 'var_shift_v64i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %b +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'var_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = shl <64 x i8> %a, %b + ret <64 x i8> %shift +} + +; +; Uniform Variable Shifts +; + +define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v2i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX-LABEL: 'splatvar_shift_v2i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'splatvar_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %insert = insertelement <2 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer + %shift = shl <2 x i64> %a, %splat + ret <2 x i64> %shift +} + +define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v4i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatvar_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> poison, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %insert = insertelement <4 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer + %shift = shl <4 x i64> %a, %splat + ret <4 x i64> %shift +} + +define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatvar_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> poison, i64 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %insert = insertelement <8 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer + %shift = shl <8 x i64> %a, %splat + ret <8 x i64> %shift +} + +define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX-LABEL: 'splatvar_shift_v4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOP-LABEL: 'splatvar_shift_v4i32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %insert = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + %shift = shl <4 x i32> %a, %splat + ret <4 x i32> %shift +} + +define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatvar_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> poison, i32 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %insert = insertelement <8 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer + %shift = shl <8 x i32> %a, %splat + ret <8 x i32> %shift +} + +define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatvar_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> poison, i32 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %insert = insertelement <16 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer + %shift = shl <16 x i32> %a, %splat + ret <16 x i32> %shift +} + +define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX-LABEL: 'splatvar_shift_v8i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; XOP-LABEL: 'splatvar_shift_v8i16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX512-LABEL: 'splatvar_shift_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i16> poison, i16 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %insert = insertelement <8 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer + %shift = shl <8 x i16> %a, %splat + ret <8 x i16> %shift +} + +define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatvar_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> poison, i16 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %insert = insertelement <16 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer + %shift = shl <16 x i16> %a, %splat + ret <16 x i16> %shift +} + +define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { +; SSE2-LABEL: 'splatvar_shift_v32i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; SSE42-LABEL: 'splatvar_shift_v32i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatvar_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatvar_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> poison, i16 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %insert = insertelement <32 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer + %shift = shl <32 x i16> %a, %splat + ret <32 x i16> %shift +} + +define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatvar_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'splatvar_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %insert = insertelement <16 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer + %shift = shl <16 x i8> %a, %splat + ret <16 x i8> %shift +} + +define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatvar_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatvar_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %insert = insertelement <32 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer + %shift = shl <32 x i8> %a, %splat + ret <32 x i8> %shift +} + +define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { +; SSE2-LABEL: 'splatvar_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'splatvar_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, %splat +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatvar_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatvar_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatvar_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, %splat +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatvar_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %insert = insertelement <64 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer + %shift = shl <64 x i8> %a, %splat + ret <64 x i8> %shift +} + +; +; Constant Shifts +; + +define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { +; SSE-LABEL: 'constant_shift_v2i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v2i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v2i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; XOP-LABEL: 'constant_shift_v2i64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v2i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v2i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = shl <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { +; SSE-LABEL: 'constant_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = shl <4 x i64> %a, + ret <4 x i64> %shift +} + +define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { +; SSE-LABEL: 'constant_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'constant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'constant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'constant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = shl <8 x i64> %a, + ret <8 x i64> %shift +} + +define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; XOP-LABEL: 'constant_shift_v4i32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v4i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v4i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = shl <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = shl <8 x i32> %a, + ret <8 x i32> %shift +} + +define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { +; SSE2-LABEL: 'constant_shift_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; SSE42-LABEL: 'constant_shift_v16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'constant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'constant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'constant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = shl <16 x i32> %a, + ret <16 x i32> %shift +} + +define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { +; CHECK-LABEL: 'constant_shift_v8i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = shl <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { +; SSE-LABEL: 'constant_shift_v16i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'constant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512-LABEL: 'constant_shift_v16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = shl <16 x i16> %a, + ret <16 x i16> %shift +} + +define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { +; SSE-LABEL: 'constant_shift_v32i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'constant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'constant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'constant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'constant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = shl <32 x i16> %a, + ret <32 x i16> %shift +} + +define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v16i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v16i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'constant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'constant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = shl <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v32i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v32i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'constant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v32i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'constant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = shl <32 x i8> %a, + ret <32 x i8> %shift +} + +define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { +; SSE2-LABEL: 'constant_shift_v64i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %shift = shl <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; SSE42-LABEL: 'constant_shift_v64i8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %shift = shl <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'constant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOP-LABEL: 'constant_shift_v64i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'constant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = shl <64 x i8> %a, + ret <64 x i8> %shift +} + +; +; Uniform Constant Shifts +; + +define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { +; CHECK-LABEL: 'splatconstant_shift_v2i64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift +; + %shift = shl <2 x i64> %a, + ret <2 x i64> %shift +} + +define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { +; SSE-LABEL: 'splatconstant_shift_v4i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v4i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v4i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v4i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v4i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift +; + %shift = shl <4 x i64> %a, + ret <4 x i64> %shift +} + +define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { +; SSE-LABEL: 'splatconstant_shift_v8i64' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift +; + %shift = shl <8 x i64> %a, + ret <8 x i64> %shift +} + +define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { +; CHECK-LABEL: 'splatconstant_shift_v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift +; + %shift = shl <4 x i32> %a, + ret <4 x i32> %shift +} + +define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { +; SSE-LABEL: 'splatconstant_shift_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v8i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v8i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v8i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift +; + %shift = shl <8 x i32> %a, + ret <8 x i32> %shift +} + +define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift +; + %shift = shl <16 x i32> %a, + ret <16 x i32> %shift +} + +define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { +; CHECK-LABEL: 'splatconstant_shift_v8i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; + %shift = shl <8 x i16> %a, + ret <8 x i16> %shift +} + +define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v16i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v16i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v16i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift +; + %shift = shl <16 x i16> %a, + ret <16 x i16> %shift +} + +define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { +; SSE-LABEL: 'splatconstant_shift_v32i16' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v32i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v32i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v32i16' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift +; + %shift = shl <32 x i16> %a, + ret <32 x i16> %shift +} + +define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v16i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX-LABEL: 'splatconstant_shift_v16i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; XOP-LABEL: 'splatconstant_shift_v16i8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v16i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; + %shift = shl <16 x i8> %a, + ret <16 x i8> %shift +} + +define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v32i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v32i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v32i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512-LABEL: 'splatconstant_shift_v32i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v32i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; + %shift = shl <32 x i8> %a, + ret <32 x i8> %shift +} + +define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { +; SSE-LABEL: 'splatconstant_shift_v64i8' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX1-LABEL: 'splatconstant_shift_v64i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX2-LABEL: 'splatconstant_shift_v64i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512F-LABEL: 'splatconstant_shift_v64i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BW-LABEL: 'splatconstant_shift_v64i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512VL-LABEL: 'splatconstant_shift_v64i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; +; BTVER2-LABEL: 'splatconstant_shift_v64i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift +; + %shift = shl <64 x i8> %a, + ret <64 x i8> %shift +} + +; +; Special Cases +; + +; We always emit a single pmullw in the case of v8i16 vector shifts by +; non-uniform constant. + +define <8 x i16> @test1(<8 x i16> %a) { +; CHECK-LABEL: 'test1' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shl +; + %shl = shl <8 x i16> %a, + ret <8 x i16> %shl +} + +define <8 x i16> @test2(<8 x i16> %a) { +; CHECK-LABEL: 'test2' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shl +; + %shl = shl <8 x i16> %a, + ret <8 x i16> %shl +} + +; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction. +; Make sure that the estimated cost is always 2 except for the case where +; we only have SSE2 support. With SSE2, we are forced to special lower the +; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle. + +define <4 x i32> @test3(<4 x i32> %a) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shl = shl <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; AVX1-LABEL: 'test3' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; AVX2-LABEL: 'test3' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; XOP-LABEL: 'test3' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i32> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; BTVER2-LABEL: 'test3' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} + +define <4 x i32> @test4(<4 x i32> %a) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shl = shl <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; XOP-LABEL: 'test4' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i32> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; +; BTVER2-LABEL: 'test4' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shl +; + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} + +; On AVX2 we are able to lower the following shift into a single +; vpsllvq. Therefore, the expected cost is only 1. +; In all other cases, this shift is scalarized as the target does not support +; vpsllv instructions. + +define <2 x i64> @test5(<2 x i64> %a) { +; SSE-LABEL: 'test5' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl +; +; AVX1-LABEL: 'test5' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl +; +; AVX2-LABEL: 'test5' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl +; +; XOP-LABEL: 'test5' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl +; +; BTVER2-LABEL: 'test5' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl +; + %shl = shl <2 x i64> %a, + ret <2 x i64> %shl +} + +; v16i16 and v8i32 shift left by non-uniform constant are lowered into +; vector multiply instructions. With AVX (but not AVX2), the vector multiply +; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert. +; +; With AVX2, instruction vpmullw works with 256bit quantities and +; therefore there is no need to split the resulting vector multiply into +; a sequence of two multiply. +; +; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice +; the cost computed in the case of 'test1'. That is because the backend +; simply emits 2 pmullw with no extract/insert. + +define <16 x i16> @test6(<16 x i16> %a) { +; SSE-LABEL: 'test6' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl +; +; AVX1-LABEL: 'test6' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl +; +; AVX2-LABEL: 'test6' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl +; +; XOPAVX1-LABEL: 'test6' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl +; +; XOPAVX2-LABEL: 'test6' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl +; +; BTVER2-LABEL: 'test6' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl +; + %shl = shl <16 x i16> %a, + ret <16 x i16> %shl +} + +; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice +; the cost computed in the case of 'test3'. That is because the multiply +; is type-legalized into two 4i32 vector multiply. + +define <8 x i32> @test7(<8 x i32> %a) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shl = shl <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; +; AVX1-LABEL: 'test7' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; +; AVX2-LABEL: 'test7' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; +; XOPAVX1-LABEL: 'test7' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; +; XOPAVX2-LABEL: 'test7' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; +; BTVER2-LABEL: 'test7' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl +; + %shl = shl <8 x i32> %a, + ret <8 x i32> %shl +} + +; On AVX2 we are able to lower the following shift into a single +; vpsllvq. Therefore, the expected cost is only 1. +; In all other cases, this shift is scalarized as the target does not support +; vpsllv instructions. + +define <4 x i64> @test8(<4 x i64> %a) { +; SSE-LABEL: 'test8' +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl +; +; AVX1-LABEL: 'test8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shl = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl +; +; AVX2-LABEL: 'test8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl +; +; XOPAVX1-LABEL: 'test8' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl +; +; XOPAVX2-LABEL: 'test8' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl +; +; BTVER2-LABEL: 'test8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shl = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl +; + %shl = shl <4 x i64> %a, + ret <4 x i64> %shl +} + +; Same as 'test6', with the difference that the cost is double. + +define <32 x i16> @test9(<32 x i16> %a) { +; SSE-LABEL: 'test9' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; AVX1-LABEL: 'test9' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; AVX2-LABEL: 'test9' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; XOPAVX1-LABEL: 'test9' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; XOPAVX2-LABEL: 'test9' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; AVX512F-LABEL: 'test9' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; AVX512BW-LABEL: 'test9' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; AVX512VL-LABEL: 'test9' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; AVX512BWVL-LABEL: 'test9' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; +; BTVER2-LABEL: 'test9' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl +; + %shl = shl <32 x i16> %a, + ret <32 x i16> %shl +} + +; Same as 'test7', except that now the cost is double. + +define <16 x i32> @test10(<16 x i32> %a) { +; SSE2-LABEL: 'test10' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shl = shl <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; +; SSE42-LABEL: 'test10' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; +; AVX1-LABEL: 'test10' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; +; AVX2-LABEL: 'test10' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; +; XOPAVX1-LABEL: 'test10' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; +; XOPAVX2-LABEL: 'test10' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; +; AVX512-LABEL: 'test10' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; +; BTVER2-LABEL: 'test10' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl +; + %shl = shl <16 x i32> %a, + ret <16 x i32> %shl +} + +; On AVX2 we are able to lower the following shift into a sequence of +; two vpsllvq instructions. Therefore, the expected cost is only 2. +; In all other cases, this shift is scalarized as we don't have vpsllv +; instructions. + +define <8 x i64> @test11(<8 x i64> %a) { +; SSE-LABEL: 'test11' +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shl = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl +; +; AVX1-LABEL: 'test11' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shl = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl +; +; AVX2-LABEL: 'test11' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl +; +; XOPAVX1-LABEL: 'test11' +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl +; +; XOPAVX2-LABEL: 'test11' +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl +; +; AVX512-LABEL: 'test11' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shl = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl +; +; BTVER2-LABEL: 'test11' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shl = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl +; + %shl = shl <8 x i64> %a, + ret <8 x i64> %shl +} diff --git a/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll b/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll new file mode 100644 index 0000000..ce735a2 --- /dev/null +++ b/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll @@ -0,0 +1,136 @@ +; RUN: opt -S -demanded-bits -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s + +; CHECK-DAG: DemandedBits: 0xff00 for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xff00 for %y = or <2 x i32> %b, zeroinitializer +; CHECK-DAG: DemandedBits: 0xff00 for %z = or <2 x i32> %x, %y +; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %r = trunc <2 x i32> %u to <2 x i8> +define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) { + %x = or <2 x i32> %a, zeroinitializer + %y = or <2 x i32> %b, zeroinitializer + %z = or <2 x i32> %x, %y + %u = lshr <2 x i32> %z, + %r = trunc <2 x i32> %u to <2 x i8> + ret <2 x i8> %r +} + +; Vector-specific instructions + +; CHECK-DAG: DemandedBits: 0xff for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xf0 for %z = extractelement <2 x i32> %x, i32 1 +; CHECK-DAG: DemandedBits: 0xf for %y = extractelement <2 x i32> %x, i32 0 +; CHECK-DAG: DemandedBits: 0xffffffff for %u = and i32 %y, 15 +; CHECK-DAG: DemandedBits: 0xffffffff for %v = and i32 %z, 240 +; CHECK-DAG: DemandedBits: 0xffffffff for %r = or i32 %u, %v +define i32 @test_extractelement(<2 x i32> %a) { + %x = or <2 x i32> %a, zeroinitializer + %y = extractelement <2 x i32> %x, i32 0 + %z = extractelement <2 x i32> %x, i32 1 + %u = and i32 %y, 15 + %v = and i32 %z, 240 + %r = or i32 %u, %v + ret i32 %r +} + +; CHECK-DAG: DemandedBits: 0xff for %x = or i32 %a, 0 +; CHECK-DAG: DemandedBits: 0xff for %y = or i32 %b, 0 +; CHECK-DAG: DemandedBits: 0xff for %z = insertelement <2 x i32> poison, i32 %x, i32 0 +; CHECK-DAG: DemandedBits: 0xff for %u = insertelement <2 x i32> %z, i32 %y, i32 1 +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %u, +define <2 x i32> @test_insertelement(i32 %a, i32 %b) { + %x = or i32 %a, 0 + %y = or i32 %b, 0 + %z = insertelement <2 x i32> poison, i32 %x, i32 0 + %u = insertelement <2 x i32> %z, i32 %y, i32 1 + %r = and <2 x i32> %u, + ret <2 x i32> %r +} + +; CHECK-DAG: DemandedBits: 0xff for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xff for %y = or <2 x i32> %b, zeroinitializer +; CHECK-DAG: DemandedBits: 0xff for %z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <3 x i32> %z, +define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) { + %x = or <2 x i32> %a, zeroinitializer + %y = or <2 x i32> %b, zeroinitializer + %z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> + %r = and <3 x i32> %z, + ret <3 x i32> %r +} + +; Shifts with splat shift amounts + +; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +define <2 x i32> @test_shl(<2 x i32> %a) { + %x = or <2 x i32> %a, zeroinitializer + %y = shl <2 x i32> %x, + %r = and <2 x i32> %y, + ret <2 x i32> %r +} + +; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +define <2 x i32> @test_ashr(<2 x i32> %a) { + %x = or <2 x i32> %a, zeroinitializer + %y = ashr <2 x i32> %x, + %r = and <2 x i32> %y, + ret <2 x i32> %r +} + +; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +define <2 x i32> @test_lshr(<2 x i32> %a) { + %x = or <2 x i32> %a, zeroinitializer + %y = lshr <2 x i32> %x, + %r = and <2 x i32> %y, + ret <2 x i32> %r +} + +declare <2 x i32> @llvm.fshl.i32(<2 x i32>, <2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>) + +; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { + %x = or <2 x i32> %a, zeroinitializer + %y = or <2 x i32> %b, zeroinitializer + %z = call <2 x i32> @llvm.fshl.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) + %r = and <2 x i32> %z, + ret <2 x i32> %r +} + +; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer +; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) { + %x = or <2 x i32> %a, zeroinitializer + %y = or <2 x i32> %b, zeroinitializer + %z = call <2 x i32> @llvm.fshr.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) + %r = and <2 x i32> %z, + ret <2 x i32> %r +} + +; FP / Int conversion. These have different input / output types. + +; CHECK-DAG: DemandedBits: 0xffffffff for %x = or <2 x i32> %a, zeroinitializer +define <2 x float> @test_uitofp(<2 x i32> %a) { + %x = or <2 x i32> %a, zeroinitializer + %r = uitofp <2 x i32> %x to <2 x float> + ret <2 x float> %r +} + +; CHECK-DAG: DemandedBits: 0xffffffff for %y = fptoui <2 x float> %x to <2 x i32> +define <2 x i32> @test_fptoui(<2 x float> %a) { + %x = fadd <2 x float> %a, + %y = fptoui <2 x float> %x to <2 x i32> + %r = and <2 x i32> %y, + ret <2 x i32> %y +} diff --git a/llvm/test/DebugInfo/X86/stack-value-piece-inseltpoison.ll b/llvm/test/DebugInfo/X86/stack-value-piece-inseltpoison.ll new file mode 100644 index 0000000..69362f7 --- /dev/null +++ b/llvm/test/DebugInfo/X86/stack-value-piece-inseltpoison.ll @@ -0,0 +1,114 @@ +; RUN: llc %s -filetype=obj -o - | llvm-dwarfdump -debug-info -debug-loc - | FileCheck %s +; Test that DW_OP_piece is emitted for constants. +; +; // Generated from: +; typedef struct { int a, b; } I; +; I i(int i) { +; I r = {i, 0}; +; return r; +; } +; +; typedef struct { float a, b; } F; +; F f(float f) { +; F r = {f, 0}; +; return r; +; } + +; CHECK: .debug_info contents: +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("i") +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_location ([[I:0x[0-9a-f]+]] +; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_reg5 RDI, DW_OP_piece 0x4, DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4) +; CHECK-NEXT: DW_AT_name ("r") +; +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("f") +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_location ([[F:0x[0-9a-f]+]] +; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_reg17 XMM0, DW_OP_piece 0x4, {{(DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4|DW_OP_implicit_value 0x4 0x00 0x00 0x00 0x00)}} +; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_piece 0x4, {{(DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4|DW_OP_implicit_value 0x4 0x00 0x00 0x00 0x00)}}) +; CHECK-NEXT: DW_AT_name ("r") +; +; CHECK: .debug_loc contents: +; CHECK: [[I]]: +; CHECK-NEXT: ({{.*}}, {{.*}}): DW_OP_reg5 RDI, DW_OP_piece 0x4, DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4 +; CHECK: [[F]]: +; CHECK-NEXT: ({{.*}}, {{.*}}): DW_OP_reg17 XMM0, DW_OP_piece 0x4, {{DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4|DW_OP_implicit_value 0x4 0x00 0x00 0x00 0x00}} + +source_filename = "stack-value-piece.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +%struct.I = type { i32, i32 } +%struct.F = type { float, float } + +; Function Attrs: nounwind readnone ssp uwtable +define i64 @i(i32 %i) local_unnamed_addr #0 !dbg !7 { +entry: + tail call void @llvm.dbg.value(metadata i32 %i, metadata !18, metadata !22), !dbg !21 + tail call void @llvm.dbg.value(metadata i32 0, metadata !18, metadata !23), !dbg !21 + %retval.sroa.0.0.insert.ext = zext i32 %i to i64, !dbg !24 + ret i64 %retval.sroa.0.0.insert.ext, !dbg !24 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind readnone ssp uwtable +define <2 x float> @f(float %f) local_unnamed_addr #0 !dbg !25 { +entry: + tail call void @llvm.dbg.value(metadata float %f, metadata !36, metadata !22), !dbg !38 + tail call void @llvm.dbg.value(metadata float 0.000000e+00, metadata !36, metadata !23), !dbg !38 + %retval.sroa.0.0.vec.insert = insertelement <2 x float> poison, float %f, i32 0, !dbg !39 + %retval.sroa.0.4.vec.insert = insertelement <2 x float> %retval.sroa.0.0.vec.insert, float 0.000000e+00, i32 1, !dbg !39 + ret <2 x float> %retval.sroa.0.4.vec.insert, !dbg !40 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind readnone ssp uwtable } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 285655) (llvm/trunk 285654)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "stack-value-piece.c", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = !{!"clang version 4.0.0 (trunk 285655) (llvm/trunk 285654)"} +!7 = distinct !DISubprogram(name: "i", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !14} +!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "I", file: !1, line: 1, baseType: !11) +!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !1, line: 1, size: 64, elements: !12) +!12 = !{!13, !15} +!13 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !11, file: !1, line: 1, baseType: !14, size: 32) +!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !11, file: !1, line: 1, baseType: !14, size: 32, offset: 32) +!18 = !DILocalVariable(name: "r", scope: !7, file: !1, line: 3, type: !10) +!19 = !DIExpression() +!20 = !DILocation(line: 2, column: 9, scope: !7) +!21 = !DILocation(line: 3, column: 5, scope: !7) +!22 = !DIExpression(DW_OP_LLVM_fragment, 0, 32) +!23 = !DIExpression(DW_OP_LLVM_fragment, 32, 32) +!24 = !DILocation(line: 5, column: 1, scope: !7) +!25 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 8, type: !26, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!26 = !DISubroutineType(types: !27) +!27 = !{!28, !32} +!28 = !DIDerivedType(tag: DW_TAG_typedef, name: "F", file: !1, line: 7, baseType: !29) +!29 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !1, line: 7, size: 64, elements: !30) +!30 = !{!31, !33} +!31 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !29, file: !1, line: 7, baseType: !32, size: 32) +!32 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!33 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !29, file: !1, line: 7, baseType: !32, size: 32, offset: 32) +!36 = !DILocalVariable(name: "r", scope: !25, file: !1, line: 9, type: !28) +!37 = !DILocation(line: 8, column: 11, scope: !25) +!38 = !DILocation(line: 9, column: 5, scope: !25) +!39 = !DILocation(line: 10, column: 10, scope: !25) +!40 = !DILocation(line: 11, column: 1, scope: !25) diff --git a/llvm/test/Other/scalable-vectors-core-ir.ll b/llvm/test/Other/scalable-vectors-core-ir.ll index 60cbab3..f04184a 100644 --- a/llvm/test/Other/scalable-vectors-core-ir.ll +++ b/llvm/test/Other/scalable-vectors-core-ir.ll @@ -200,6 +200,16 @@ define @shufflevector(half %val) { ret %r } +define @shufflevector2(half %val) { +; CHECK-LABEL: @shufflevector +; CHECK: %insvec = insertelement poison, half %val, i32 0 +; CHECK-NEXT: %r = shufflevector %insvec, undef, zeroinitializer +; CHECK-NEXT: ret %r + %insvec = insertelement poison, half %val, i32 0 + %r = shufflevector %insvec, undef, zeroinitializer + ret %r +} + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Memory Access and Addressing Operations ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -390,4 +400,4 @@ define @call( %val) { ; CHECK-NEXT: ret %r %r = call @callee( %val) ret %r -} \ No newline at end of file +} -- 2.7.4