From 2dfe76e989877d3992bf52971f27ad4ae5064a6d Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Wed, 23 Nov 2022 09:40:50 +0800 Subject: [PATCH] [CostModel][X86] Add CostKinds test coverage for shufflevector instruction Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D138485 --- .../CostModel/X86/shuffle-broadcast-codesize.ll | 376 +++++ .../X86/shuffle-broadcast-fp16-codesize.ll | 19 + .../X86/shuffle-broadcast-fp16-latency.ll | 19 + .../X86/shuffle-broadcast-fp16-sizelatency.ll | 19 + .../CostModel/X86/shuffle-broadcast-latency.ll | 376 +++++ .../CostModel/X86/shuffle-broadcast-sizelatency.ll | 376 +++++ .../X86/shuffle-extract_subvector-codesize.ll | 1646 ++++++++++++++++++++ .../X86/shuffle-extract_subvector-latency.ll | 1646 ++++++++++++++++++++ .../X86/shuffle-extract_subvector-sizelatency.ll | 1646 ++++++++++++++++++++ .../X86/shuffle-insert_subvector-codesize.ll | 1220 +++++++++++++++ .../X86/shuffle-insert_subvector-latency.ll | 1220 +++++++++++++++ .../X86/shuffle-insert_subvector-sizelatency.ll | 1220 +++++++++++++++ .../CostModel/X86/shuffle-load-codesize.ll | 473 ++++++ .../Analysis/CostModel/X86/shuffle-load-latency.ll | 473 ++++++ .../CostModel/X86/shuffle-load-sizelatency.ll | 473 ++++++ .../CostModel/X86/shuffle-non-pow-2-codesize.ll | 31 + .../CostModel/X86/shuffle-non-pow-2-latency.ll | 31 + .../CostModel/X86/shuffle-non-pow-2-sizelatency.ll | 31 + .../X86/shuffle-replication-i1-codesize.ll | 1255 +++++++++++++++ .../X86/shuffle-replication-i1-latency.ll | 1255 +++++++++++++++ .../X86/shuffle-replication-i1-sizelatency.ll | 1255 +++++++++++++++ .../X86/shuffle-replication-i16-codesize.ll | 789 ++++++++++ .../X86/shuffle-replication-i16-latency.ll | 789 ++++++++++ .../X86/shuffle-replication-i16-sizelatency.ll | 789 ++++++++++ .../X86/shuffle-replication-i32-codesize.ll | 521 +++++++ .../X86/shuffle-replication-i32-latency.ll | 521 +++++++ .../X86/shuffle-replication-i32-sizelatency.ll | 521 +++++++ .../X86/shuffle-replication-i64-codesize.ll | 458 ++++++ .../X86/shuffle-replication-i64-latency.ll | 458 ++++++ .../X86/shuffle-replication-i64-sizelatency.ll | 458 ++++++ .../X86/shuffle-replication-i8-codesize.ll | 789 ++++++++++ .../X86/shuffle-replication-i8-latency.ll | 789 ++++++++++ .../X86/shuffle-replication-i8-sizelatency.ll | 789 ++++++++++ .../CostModel/X86/shuffle-reverse-codesize.ll | 346 ++++ .../CostModel/X86/shuffle-reverse-fp16-codesize.ll | 19 + .../CostModel/X86/shuffle-reverse-fp16-latency.ll | 19 + .../X86/shuffle-reverse-fp16-sizelatency.ll | 19 + .../CostModel/X86/shuffle-reverse-latency.ll | 346 ++++ .../CostModel/X86/shuffle-reverse-sizelatency.ll | 346 ++++ .../CostModel/X86/shuffle-select-codesize.ll | 350 +++++ .../CostModel/X86/shuffle-select-latency.ll | 350 +++++ .../CostModel/X86/shuffle-select-sizelatency.ll | 350 +++++ .../CostModel/X86/shuffle-single-src-codesize.ll | 389 +++++ .../X86/shuffle-single-src-fp16-codesize.ll | 17 + .../X86/shuffle-single-src-fp16-latency.ll | 17 + .../X86/shuffle-single-src-fp16-sizelatency.ll | 17 + .../CostModel/X86/shuffle-single-src-latency.ll | 389 +++++ .../X86/shuffle-single-src-sizelatency.ll | 389 +++++ .../CostModel/X86/shuffle-splice-codesize.ll | 323 ++++ .../CostModel/X86/shuffle-splice-latency.ll | 323 ++++ .../CostModel/X86/shuffle-splice-sizelatency.ll | 323 ++++ .../CostModel/X86/shuffle-transpose-codesize.ll | 295 ++++ .../CostModel/X86/shuffle-transpose-latency.ll | 295 ++++ .../CostModel/X86/shuffle-transpose-sizelatency.ll | 295 ++++ .../CostModel/X86/shuffle-two-src-codesize.ll | 370 +++++ .../CostModel/X86/shuffle-two-src-fp16-codesize.ll | 17 + .../CostModel/X86/shuffle-two-src-fp16-latency.ll | 17 + .../X86/shuffle-two-src-fp16-sizelatency.ll | 17 + .../CostModel/X86/shuffle-two-src-latency.ll | 370 +++++ .../CostModel/X86/shuffle-two-src-sizelatency.ll | 370 +++++ 60 files changed, 29109 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-broadcast-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-broadcast-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-broadcast-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-load-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-load-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-load-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-reverse-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-reverse-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-reverse-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-select-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-select-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-select-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-splice-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-splice-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-splice-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-transpose-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-transpose-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-transpose-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-codesize.ll new file mode 100644 index 0000000..c245485 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-codesize.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 + +; +; Verify the cost model for broadcast shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer + ret void +} + +define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> %src16, <32 x i1> %src32, <64 x i1> %src64) { +; SSE2-LABEL: 'test_vXi1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi1' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi1' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi1' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer + %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer + %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer + %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer + %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer + %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer + ret void +} + +; +; Tests the cost model for broadcast shuffles of second operand. +; + +define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_upper_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_upper_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_upper_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_upper_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-codesize.ll new file mode 100644 index 0000000..88f8a9d --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-codesize.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer + %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-latency.ll new file mode 100644 index 0000000..e75715c --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-latency.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer + %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-sizelatency.ll new file mode 100644 index 0000000..11207ce --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-fp16-sizelatency.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> zeroinitializer + %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> zeroinitializer + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> zeroinitializer + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> zeroinitializer + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> zeroinitializer + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-latency.ll new file mode 100644 index 0000000..7a5e91b --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-latency.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 + +; +; Verify the cost model for broadcast shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer + ret void +} + +define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> %src16, <32 x i1> %src32, <64 x i1> %src64) { +; SSE2-LABEL: 'test_vXi1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi1' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi1' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi1' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer + %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer + %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer + %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer + %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer + %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer + ret void +} + +; +; Tests the cost model for broadcast shuffles of second operand. +; + +define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_upper_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_upper_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_upper_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_upper_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-sizelatency.ll new file mode 100644 index 0000000..34754f8 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-broadcast-sizelatency.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 + +; +; Verify the cost model for broadcast shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> zeroinitializer + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> zeroinitializer + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> zeroinitializer + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> zeroinitializer + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> zeroinitializer + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer + ret void +} + +define void @test_vXi1(<2 x i1> %src2, <4 x i1> %src4, <8 x i1> %src8, <16 x i1> %src16, <32 x i1> %src32, <64 x i1> %src64) { +; SSE2-LABEL: 'test_vXi1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi1' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi1' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi1' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V2 = shufflevector <2 x i1> %src2, <2 x i1> undef, <2 x i32> zeroinitializer + %V4 = shufflevector <4 x i1> %src4, <4 x i1> undef, <4 x i32> zeroinitializer + %V8 = shufflevector <8 x i1> %src8, <8 x i1> undef, <8 x i32> zeroinitializer + %V16 = shufflevector <16 x i1> %src16, <16 x i1> undef, <16 x i32> zeroinitializer + %V32 = shufflevector <32 x i1> %src32, <32 x i1> undef, <32 x i32> zeroinitializer + %V64 = shufflevector <64 x i1> %src64, <64 x i1> undef, <64 x i32> zeroinitializer + ret void +} + +; +; Tests the cost model for broadcast shuffles of second operand. +; + +define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_upper_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_upper_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_upper_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_upper_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll new file mode 100644 index 0000000..2676f7a --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll @@ -0,0 +1,1646 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX + +; +; Verify the cost model for extract_subector style shuffles. +; + +define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> + %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> + %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + ret void +} + +define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> + %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> + %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + ret void +} + +define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + ret void +} + +define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLM-LABEL: 'test_vXi16' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; GLM-LABEL: 'test_vXi16' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + ret void +} + +define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLM-LABEL: 'test_vXi8' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; GLM-LABEL: 'test_vXi8' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll new file mode 100644 index 0000000..5c5ee39 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll @@ -0,0 +1,1646 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX + +; +; Verify the cost model for extract_subector style shuffles. +; + +define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> + %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> + %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + ret void +} + +define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> + %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> + %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + ret void +} + +define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + ret void +} + +define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLM-LABEL: 'test_vXi16' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; GLM-LABEL: 'test_vXi16' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + ret void +} + +define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLM-LABEL: 'test_vXi8' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; GLM-LABEL: 'test_vXi8' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll new file mode 100644 index 0000000..8fe74bd --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll @@ -0,0 +1,1646 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX + +; +; Verify the cost model for extract_subector style shuffles. +; + +define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> + %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> + %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> + %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> + ret void +} + +define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> + %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> + %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> + %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> + ret void +} + +define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> undef, <2 x i32> + %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> undef, <2 x i32> + %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> undef, <4 x i32> + %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> undef, <2 x i32> + %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <4 x i32> + %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> undef, <8 x i32> + ret void +} + +define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi16' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLM-LABEL: 'test_vXi16' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; GLM-LABEL: 'test_vXi16' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> + %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> + %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> + %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> + %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> + %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> + %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> + ret void +} + +define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLM-LABEL: 'test_vXi8' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; GLM-LABEL: 'test_vXi8' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> + %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> + %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> + %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> + %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> + %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> + %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> + %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> + %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> + %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> + %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> + %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> + %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll new file mode 100644 index 0000000..1202666 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll @@ -0,0 +1,1220 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VMBI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for insert_subector style shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> + %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> + %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> + + %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> + %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> + %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> + %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> + %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> + %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> + + %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> + %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> + %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> + %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> + %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> + %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> + %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> + %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> + %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> + + %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> + %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> + + %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> + %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> + + %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> + %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> + %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> + %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> + %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> + %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> + + %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> + %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> + + %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> + %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> + + %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VMBI-LABEL: 'test_vXi16' +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> + %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> + %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> + %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> + %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> + %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> + %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> + %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> + %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> + %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> + + %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> + %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> + + %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + + %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VMBI-LABEL: 'test_vXi8' +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> + %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> + %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> + %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> + %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> + %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> + %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> + %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> + %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> + %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> + %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> + %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> + %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> + %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> + %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> + + %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> + %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> + + %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + + %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> + %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll new file mode 100644 index 0000000..ff69e97 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll @@ -0,0 +1,1220 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VMBI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for insert_subector style shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> + %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> + %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> + + %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> + %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> + %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> + %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> + %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> + %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> + + %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> + %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> + %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> + %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> + %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> + %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> + %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> + %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> + %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> + + %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> + %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> + + %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> + %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> + + %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> + %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> + %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> + %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> + %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> + %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> + + %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> + %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> + + %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> + %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> + + %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VMBI-LABEL: 'test_vXi16' +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> + %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> + %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> + %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> + %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> + %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> + %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> + %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> + %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> + %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> + + %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> + %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> + + %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + + %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VMBI-LABEL: 'test_vXi8' +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> + %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> + %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> + %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> + %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> + %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> + %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> + %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> + %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> + %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> + %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> + %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> + %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> + %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> + %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> + + %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> + %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> + + %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + + %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> + %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll new file mode 100644 index 0000000..769a9e3 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll @@ -0,0 +1,1220 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VMBI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for insert_subector style shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> + %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> + %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> + + %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> + %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> + %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> + %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> + %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> + %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> + %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> + + %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> + %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> + %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> + %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> + %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> + %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> + %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> + %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> + %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> + %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> + + %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> + %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> + + %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> + %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> + %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> + + %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> + %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> + %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> + %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> + %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> + %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> + %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> + + %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> + %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> + + %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> + %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> + %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> + + %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> + %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VMBI-LABEL: 'test_vXi16' +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> + %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> + %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> + %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> + %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> + %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> + %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> + %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> + %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> + %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> + + %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> + %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> + + %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> + + %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> + %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> + + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi8' +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VMBI-LABEL: 'test_vXi8' +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> + %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> + %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> + %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> + %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> + %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> + %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> + %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> + %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> + %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> + %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> + %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> + %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> + %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> + %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> + + %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> + %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> + + %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> + + %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> %src16_128, <16 x i32> + %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> %src32_128, <16 x i32> + %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> + %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> %src64_128, <16 x i32> + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load-codesize.ll new file mode 100644 index 0000000..ba66b0d --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-load-codesize.ll @@ -0,0 +1,473 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s -check-prefixes=AVX512 + +; This test checks that the cost of a splat-load shuffle is correctly detected. +; If there is a combined load+broadcast instruction, like `movddup` it should +; return 0. +; +; TODO: AVX `vbroadcast*` seems to support more types than the +; 2xdouble type of `movddup`: +; - `vbroadcastss` supports 4xfloat, 8xfloat +; - `vbroadcastsd` supports 4xdouble + +; NOTE: The code in this test is a hack. Since TTI cannot currently detect a +; proper broadcast pattern from a scalar load (like the one that follows), +; we use a vector load as the shuffle's operand to trigger the pattern. +; +; %load = load double, double *%ptr +; %insert = insertelement <2 x double> poison, double %load, i32 0 +; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer + +define void @shuffle_load() { +; SSE-LABEL: 'shuffle_load' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE2-LABEL: 'shuffle_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'shuffle_load' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'shuffle_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'shuffle_load' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'shuffle_load' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %ld_2xi8 = load <2 x i8>, ptr undef + %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer + %ld_4xi8 = load <4 x i8>, ptr undef + %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer + %ld_8xi8 = load <8 x i8>, ptr undef + %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer + %ld_16xi8 = load <16 x i8>, ptr undef + %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer + %ld_32xi8 = load <32 x i8>, ptr undef + %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer + %ld_64xi8 = load <64 x i8>, ptr undef + %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer + + %ld_2xi16 = load <2 x i16>, ptr undef + %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer + %ld_4xi16 = load <4 x i16>, ptr undef + %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer + %ld_8xi16 = load <8 x i16>, ptr undef + %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer + %ld_16xi16 = load <16 x i16>, ptr undef + %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer + %ld_32xi16 = load <32 x i16>, ptr undef + %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer + + %ld_2xi32 = load <2 x i32>, ptr undef + %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer + %ld_4xi32 = load <4 x i32>, ptr undef + %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer + %ld_8xi32 = load <8 x i32>, ptr undef + %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer + %ld_16xi32 = load <16 x i32>, ptr undef + %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer + + %ld_2xi64 = load <2 x i64>, ptr undef + %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer + %ld_4xi64 = load <4 x i64>, ptr undef + %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer + %ld_8xi64 = load <8 x i64>, ptr undef + %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer + + %ld_2xf16 = load <2 x half>, ptr undef + %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer + %ld_4xf16 = load <4 x half>, ptr undef + %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer + %ld_8xf16 = load <8 x half>, ptr undef + %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer + %ld_16xf16 = load <16 x half>, ptr undef + %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer + %ld_32xf16 = load <32 x half>, ptr undef + %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer + + %ld_2xf32 = load <2 x float>, ptr undef + %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer + %ld_4xf32 = load <4 x float>, ptr undef + %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer + %ld_8xf32 = load <8 x float>, ptr undef + %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer + %ld_16xf32 = load <16 x float>, ptr undef + %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer + + %ld_2xf64 = load <2 x double>, ptr undef + %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer + %ld_4xf64 = load <4 x double>, ptr undef + %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer + %ld_8xf64 = load <8 x double>, ptr undef + %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load-latency.ll new file mode 100644 index 0000000..8049194 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-load-latency.ll @@ -0,0 +1,473 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX512 + +; This test checks that the cost of a splat-load shuffle is correctly detected. +; If there is a combined load+broadcast instruction, like `movddup` it should +; return 0. +; +; TODO: AVX `vbroadcast*` seems to support more types than the +; 2xdouble type of `movddup`: +; - `vbroadcastss` supports 4xfloat, 8xfloat +; - `vbroadcastsd` supports 4xdouble + +; NOTE: The code in this test is a hack. Since TTI cannot currently detect a +; proper broadcast pattern from a scalar load (like the one that follows), +; we use a vector load as the shuffle's operand to trigger the pattern. +; +; %load = load double, double *%ptr +; %insert = insertelement <2 x double> poison, double %load, i32 0 +; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer + +define void @shuffle_load() { +; SSE-LABEL: 'shuffle_load' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE2-LABEL: 'shuffle_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'shuffle_load' +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'shuffle_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'shuffle_load' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'shuffle_load' +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %ld_2xi8 = load <2 x i8>, ptr undef + %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer + %ld_4xi8 = load <4 x i8>, ptr undef + %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer + %ld_8xi8 = load <8 x i8>, ptr undef + %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer + %ld_16xi8 = load <16 x i8>, ptr undef + %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer + %ld_32xi8 = load <32 x i8>, ptr undef + %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer + %ld_64xi8 = load <64 x i8>, ptr undef + %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer + + %ld_2xi16 = load <2 x i16>, ptr undef + %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer + %ld_4xi16 = load <4 x i16>, ptr undef + %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer + %ld_8xi16 = load <8 x i16>, ptr undef + %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer + %ld_16xi16 = load <16 x i16>, ptr undef + %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer + %ld_32xi16 = load <32 x i16>, ptr undef + %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer + + %ld_2xi32 = load <2 x i32>, ptr undef + %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer + %ld_4xi32 = load <4 x i32>, ptr undef + %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer + %ld_8xi32 = load <8 x i32>, ptr undef + %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer + %ld_16xi32 = load <16 x i32>, ptr undef + %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer + + %ld_2xi64 = load <2 x i64>, ptr undef + %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer + %ld_4xi64 = load <4 x i64>, ptr undef + %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer + %ld_8xi64 = load <8 x i64>, ptr undef + %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer + + %ld_2xf16 = load <2 x half>, ptr undef + %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer + %ld_4xf16 = load <4 x half>, ptr undef + %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer + %ld_8xf16 = load <8 x half>, ptr undef + %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer + %ld_16xf16 = load <16 x half>, ptr undef + %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer + %ld_32xf16 = load <32 x half>, ptr undef + %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer + + %ld_2xf32 = load <2 x float>, ptr undef + %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer + %ld_4xf32 = load <4 x float>, ptr undef + %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer + %ld_8xf32 = load <8 x float>, ptr undef + %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer + %ld_16xf32 = load <16 x float>, ptr undef + %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer + + %ld_2xf64 = load <2 x double>, ptr undef + %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer + %ld_4xf64 = load <4 x double>, ptr undef + %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer + %ld_8xf64 = load <8 x double>, ptr undef + %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load-sizelatency.ll new file mode 100644 index 0000000..bfb73f3 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-load-sizelatency.ll @@ -0,0 +1,473 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX512 + +; This test checks that the cost of a splat-load shuffle is correctly detected. +; If there is a combined load+broadcast instruction, like `movddup` it should +; return 0. +; +; TODO: AVX `vbroadcast*` seems to support more types than the +; 2xdouble type of `movddup`: +; - `vbroadcastss` supports 4xfloat, 8xfloat +; - `vbroadcastsd` supports 4xdouble + +; NOTE: The code in this test is a hack. Since TTI cannot currently detect a +; proper broadcast pattern from a scalar load (like the one that follows), +; we use a vector load as the shuffle's operand to trigger the pattern. +; +; %load = load double, double *%ptr +; %insert = insertelement <2 x double> poison, double %load, i32 0 +; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer + +define void @shuffle_load() { +; SSE-LABEL: 'shuffle_load' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE2-LABEL: 'shuffle_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'shuffle_load' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'shuffle_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'shuffle_load' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'shuffle_load' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %ld_2xi8 = load <2 x i8>, ptr undef + %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer + %ld_4xi8 = load <4 x i8>, ptr undef + %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer + %ld_8xi8 = load <8 x i8>, ptr undef + %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer + %ld_16xi8 = load <16 x i8>, ptr undef + %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer + %ld_32xi8 = load <32 x i8>, ptr undef + %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer + %ld_64xi8 = load <64 x i8>, ptr undef + %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer + + %ld_2xi16 = load <2 x i16>, ptr undef + %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer + %ld_4xi16 = load <4 x i16>, ptr undef + %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer + %ld_8xi16 = load <8 x i16>, ptr undef + %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer + %ld_16xi16 = load <16 x i16>, ptr undef + %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer + %ld_32xi16 = load <32 x i16>, ptr undef + %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer + + %ld_2xi32 = load <2 x i32>, ptr undef + %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer + %ld_4xi32 = load <4 x i32>, ptr undef + %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer + %ld_8xi32 = load <8 x i32>, ptr undef + %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer + %ld_16xi32 = load <16 x i32>, ptr undef + %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer + + %ld_2xi64 = load <2 x i64>, ptr undef + %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer + %ld_4xi64 = load <4 x i64>, ptr undef + %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer + %ld_8xi64 = load <8 x i64>, ptr undef + %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer + + %ld_2xf16 = load <2 x half>, ptr undef + %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer + %ld_4xf16 = load <4 x half>, ptr undef + %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer + %ld_8xf16 = load <8 x half>, ptr undef + %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer + %ld_16xf16 = load <16 x half>, ptr undef + %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer + %ld_32xf16 = load <32 x half>, ptr undef + %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer + + %ld_2xf32 = load <2 x float>, ptr undef + %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer + %ld_4xf32 = load <4 x float>, ptr undef + %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer + %ld_8xf32 = load <8 x float>, ptr undef + %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer + %ld_16xf32 = load <16 x float>, ptr undef + %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer + + %ld_2xf64 = load <2 x double>, ptr undef + %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer + %ld_4xf64 = load <4 x double>, ptr undef + %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer + %ld_8xf64 = load <8 x double>, ptr undef + %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer + + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-codesize.ll new file mode 100644 index 0000000..ad7a9bc --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-codesize.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes="print" 2>&1 -disable-output | FileCheck %s -check-prefixes=AVX + +define void @test() { +; SSE-LABEL: 'test' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> + ret void +} + +define <12 x i64> @foo(<12 x i64> noundef %src) { +; SSE-LABEL: 'foo' +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <12 x i64> %shuffle +; +; AVX-LABEL: 'foo' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle +; +entry: + %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> + ret <12 x i64> %shuffle +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-latency.ll new file mode 100644 index 0000000..2a9edb8 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-latency.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes="print" 2>&1 -disable-output | FileCheck %s -check-prefixes=AVX + +define void @test() { +; SSE-LABEL: 'test' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> + ret void +} + +define <12 x i64> @foo(<12 x i64> noundef %src) { +; SSE-LABEL: 'foo' +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <12 x i64> %shuffle +; +; AVX-LABEL: 'foo' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle +; +entry: + %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> + ret <12 x i64> %shuffle +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-sizelatency.ll new file mode 100644 index 0000000..bfaf594 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-non-pow-2-sizelatency.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes="print" 2>&1 -disable-output | FileCheck %s -check-prefixes=AVX + +define void @test() { +; SSE-LABEL: 'test' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> + ret void +} + +define <12 x i64> @foo(<12 x i64> noundef %src) { +; SSE-LABEL: 'foo' +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <12 x i64> %shuffle +; +; AVX-LABEL: 'foo' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle +; +entry: + %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> + ret <12 x i64> %shuffle +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-codesize.ll new file mode 100644 index 0000000..e65a590 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-codesize.ll @@ -0,0 +1,1255 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512dq,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512DQVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BWVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512BWVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512VBMIVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512VBMIVEC256 + +define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride2' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride2' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride2' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride2' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride2' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride2' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> + ret void +} + +define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride3' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride3' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride3' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride3' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride3' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride3' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride3' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride3' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> + ret void +} + +define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride4' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride4' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride4' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride4' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride4' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride4' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> + ret void +} + +define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride5' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 668 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride5' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 664 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride5' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride5' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride5' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride5' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride5' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride5' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> + ret void +} + +define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride6' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride6' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 398 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 796 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride6' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride6' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride6' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride6' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride6' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride6' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> + ret void +} + +define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride7' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride7' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride7' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride7' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride7' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride7' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride7' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride7' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> + ret void +} + +define void @replication_i1_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 265 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1060 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride8' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride8' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride8' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride8' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride8' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride8' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-latency.ll new file mode 100644 index 0000000..62b41bd --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-latency.ll @@ -0,0 +1,1255 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512dq,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512DQVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BWVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512BWVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512VBMIVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512VBMIVEC256 + +define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride2' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride2' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride2' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride2' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride2' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride2' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> + ret void +} + +define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride3' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride3' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride3' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride3' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride3' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride3' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride3' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride3' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> + ret void +} + +define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride4' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride4' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride4' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride4' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride4' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride4' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> + ret void +} + +define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride5' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 668 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride5' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 664 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride5' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride5' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride5' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride5' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride5' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride5' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> + ret void +} + +define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride6' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride6' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 398 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 796 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride6' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride6' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride6' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride6' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride6' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride6' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> + ret void +} + +define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride7' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride7' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride7' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride7' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride7' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride7' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride7' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride7' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> + ret void +} + +define void @replication_i1_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 265 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1060 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride8' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride8' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride8' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride8' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride8' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride8' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-sizelatency.ll new file mode 100644 index 0000000..fd785d0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1-sizelatency.ll @@ -0,0 +1,1255 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512DQVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512dq,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512DQVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BWVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512BWVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512VBMIVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512VBMIVEC256 + +define void @replication_i1_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride2' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride2' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride2' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride2' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride2' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride2' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <2 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> + ret void +} + +define void @replication_i1_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 392 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride3' +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride3' +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride3' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride3' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride3' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride3' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride3' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride3' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> + ret void +} + +define void @replication_i1_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride4' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride4' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride4' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride4' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride4' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride4' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> + ret void +} + +define void @replication_i1_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride5' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 668 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride5' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 664 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride5' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride5' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride5' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride5' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride5' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride5' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> + ret void +} + +define void @replication_i1_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 217 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 434 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 868 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1736 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride6' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride6' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 398 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 796 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride6' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride6' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride6' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride6' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride6' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride6' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> + ret void +} + +define void @replication_i1_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1864 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 113 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride7' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride7' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride7' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride7' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride7' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride7' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride7' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride7' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> + ret void +} + +define void @replication_i1_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i1_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i1_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i1_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 996 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1992 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i1_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i1_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 516 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'replication_i1_stride8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'replication_i1_stride8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 265 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1060 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i1_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i1_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC512-LABEL: 'replication_i1_stride8' +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512DQVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512DQVEC256-LABEL: 'replication_i1_stride8' +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i1_stride8' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i1_stride8' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i1_stride8' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i1_stride8' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer + %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> + %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> + %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> + %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> + %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> + %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> + %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-codesize.ll new file mode 100644 index 0000000..3db892e --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-codesize.ll @@ -0,0 +1,789 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BWVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512BWVEC256 + +define void @replication_i16_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride2' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride2' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> + ret void +} + +define void @replication_i16_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride3' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride3' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> + ret void +} + +define void @replication_i16_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride4' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride4' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> + ret void +} + +define void @replication_i16_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride5' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride5' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> + ret void +} + +define void @replication_i16_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride6' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride6' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> + ret void +} + +define void @replication_i16_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride7' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride7' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> + ret void +} + +define void @replication_i16_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 612 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride8' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride8' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-latency.ll new file mode 100644 index 0000000..76aa658 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-latency.ll @@ -0,0 +1,789 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BWVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512BWVEC256 + +define void @replication_i16_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride2' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride2' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> + ret void +} + +define void @replication_i16_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride3' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride3' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> + ret void +} + +define void @replication_i16_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride4' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride4' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> + ret void +} + +define void @replication_i16_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride5' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride5' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> + ret void +} + +define void @replication_i16_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride6' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride6' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> + ret void +} + +define void @replication_i16_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride7' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride7' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> + ret void +} + +define void @replication_i16_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 612 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride8' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride8' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-sizelatency.ll new file mode 100644 index 0000000..fdc687b --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16-sizelatency.ll @@ -0,0 +1,789 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BWVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512BWVEC256 + +define void @replication_i16_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride2' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride2' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> + ret void +} + +define void @replication_i16_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride3' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride3' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> + ret void +} + +define void @replication_i16_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride4' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride4' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> + ret void +} + +define void @replication_i16_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride5' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride5' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> + ret void +} + +define void @replication_i16_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride6' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride6' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> + ret void +} + +define void @replication_i16_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride7' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride7' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> + ret void +} + +define void @replication_i16_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i16_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i16_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i16_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i16_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i16_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i16_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 612 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i16_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i16_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC512-LABEL: 'replication_i16_stride8' +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512BWVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BWVEC256-LABEL: 'replication_i16_stride8' +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer + %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> + %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> + %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> + %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> + %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> + %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-codesize.ll new file mode 100644 index 0000000..d844f7e --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-codesize.ll @@ -0,0 +1,521 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 + +define void @replication_i32_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> + ret void +} + +define void @replication_i32_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> + ret void +} + +define void @replication_i32_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> + ret void +} + +define void @replication_i32_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> + ret void +} + +define void @replication_i32_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> + ret void +} + +define void @replication_i32_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> + ret void +} + +define void @replication_i32_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-latency.ll new file mode 100644 index 0000000..630618d --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-latency.ll @@ -0,0 +1,521 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 + +define void @replication_i32_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> + ret void +} + +define void @replication_i32_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> + ret void +} + +define void @replication_i32_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> + ret void +} + +define void @replication_i32_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> + ret void +} + +define void @replication_i32_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> + ret void +} + +define void @replication_i32_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> + ret void +} + +define void @replication_i32_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-sizelatency.ll new file mode 100644 index 0000000..23242a9 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32-sizelatency.ll @@ -0,0 +1,521 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 + +define void @replication_i32_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> + ret void +} + +define void @replication_i32_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> + ret void +} + +define void @replication_i32_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> + ret void +} + +define void @replication_i32_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> + ret void +} + +define void @replication_i32_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> + ret void +} + +define void @replication_i32_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> + ret void +} + +define void @replication_i32_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i32_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i32_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i32_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-codesize.ll new file mode 100644 index 0000000..298f8c9 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-codesize.ll @@ -0,0 +1,458 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 + +define void @replication_i64_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> + ret void +} + +define void @replication_i64_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> + ret void +} + +define void @replication_i64_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> + ret void +} + +define void @replication_i64_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> + ret void +} + +define void @replication_i64_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> + ret void +} + +define void @replication_i64_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> + ret void +} + +define void @replication_i64_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-latency.ll new file mode 100644 index 0000000..56a0dfb --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-latency.ll @@ -0,0 +1,458 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 + +define void @replication_i64_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> + ret void +} + +define void @replication_i64_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> + ret void +} + +define void @replication_i64_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> + ret void +} + +define void @replication_i64_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> + ret void +} + +define void @replication_i64_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> + ret void +} + +define void @replication_i64_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> + ret void +} + +define void @replication_i64_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-sizelatency.ll new file mode 100644 index 0000000..f8ab614 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64-sizelatency.ll @@ -0,0 +1,458 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 + +define void @replication_i64_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> + ret void +} + +define void @replication_i64_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> + ret void +} + +define void @replication_i64_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> + ret void +} + +define void @replication_i64_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> + ret void +} + +define void @replication_i64_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> + ret void +} + +define void @replication_i64_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> + ret void +} + +define void @replication_i64_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i64_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i64_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i64_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-codesize.ll new file mode 100644 index 0000000..6c82ad5 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-codesize.ll @@ -0,0 +1,789 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512VBMIVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512VBMIVEC256 + +define void @replication_i8_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride2' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride2' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> + ret void +} + +define void @replication_i8_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride3' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride3' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> + ret void +} + +define void @replication_i8_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride4' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride4' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> + ret void +} + +define void @replication_i8_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride5' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride5' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> + ret void +} + +define void @replication_i8_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride6' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride6' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> + ret void +} + +define void @replication_i8_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride7' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride7' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> + ret void +} + +define void @replication_i8_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 297 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1188 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride8' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride8' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-latency.ll new file mode 100644 index 0000000..841846a --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-latency.ll @@ -0,0 +1,789 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512VBMIVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512VBMIVEC256 + +define void @replication_i8_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride2' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride2' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> + ret void +} + +define void @replication_i8_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride3' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride3' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> + ret void +} + +define void @replication_i8_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride4' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride4' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> + ret void +} + +define void @replication_i8_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride5' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride5' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> + ret void +} + +define void @replication_i8_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride6' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride6' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> + ret void +} + +define void @replication_i8_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride7' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride7' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> + ret void +} + +define void @replication_i8_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 297 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1188 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride8' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride8' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-sizelatency.ll new file mode 100644 index 0000000..232a84e --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8-sizelatency.ll @@ -0,0 +1,789 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512FVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512FVEC256 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512VBMIVEC512 +; RUN: opt < %s -passes="print" -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi,+avx512vl,+prefer-256-bit | FileCheck %s --check-prefixes=AVX512VBMIVEC256 + +define void @replication_i8_stride2() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride2' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride2' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride2' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride2' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> + ret void +} + +define void @replication_i8_stride3() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride3' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride3' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride3' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride3' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> + ret void +} + +define void @replication_i8_stride4() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride4' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride4' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride4' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride4' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> + ret void +} + +define void @replication_i8_stride5() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride5' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride5' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride5' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride5' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> + ret void +} + +define void @replication_i8_stride6() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride6' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride6' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride6' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride6' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> + ret void +} + +define void @replication_i8_stride7() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride7' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride7' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride7' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride7' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> + ret void +} + +define void @replication_i8_stride8() nounwind "min-legal-vector-width"="256" { +; SSE2-LABEL: 'replication_i8_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 297 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1188 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC512-LABEL: 'replication_i8_stride8' +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512FVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512FVEC256-LABEL: 'replication_i8_stride8' +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC512-LABEL: 'replication_i8_stride8' +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512VBMIVEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMIVEC256-LABEL: 'replication_i8_stride8' +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512VBMIVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-codesize.ll new file mode 100644 index 0000000..1c75978 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-codesize.ll @@ -0,0 +1,346 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 + +; +; Verify the cost model for reverse shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> + ret void +} + +; +; Tests the cost model for reverse shuffles of second operand. +; + +define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_upper_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_upper_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_upper_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_upper_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-codesize.ll new file mode 100644 index 0000000..baa7f41 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-codesize.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> + %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-latency.ll new file mode 100644 index 0000000..63d1af3 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-latency.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> + %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-sizelatency.ll new file mode 100644 index 0000000..7de18b5 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16-sizelatency.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> + %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-latency.ll new file mode 100644 index 0000000..a0af548 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-latency.ll @@ -0,0 +1,346 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 + +; +; Verify the cost model for reverse shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> + ret void +} + +; +; Tests the cost model for reverse shuffles of second operand. +; + +define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_upper_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_upper_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_upper_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_upper_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-reverse-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-sizelatency.ll new file mode 100644 index 0000000..0da3bbd --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-reverse-sizelatency.ll @@ -0,0 +1,346 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 + +; +; Verify the cost model for reverse shuffles. +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> + ret void +} + +; +; Tests the cost model for reverse shuffles of second operand. +; + +define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_upper_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_upper_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_upper_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_upper_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-select-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-select-codesize.ll new file mode 100644 index 0000000..e66bce0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-select-codesize.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for select shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <2 x i16> %src32_1, <4 x i16> %src64_1, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-select-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-select-latency.ll new file mode 100644 index 0000000..77f0072 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-select-latency.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for select shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <2 x i16> %src32_1, <4 x i16> %src64_1, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-select-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-select-sizelatency.ll new file mode 100644 index 0000000..d01e011 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-select-sizelatency.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for select shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <2 x i16> %src32_1, <4 x i16> %src64_1, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll new file mode 100644 index 0000000..65313e6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll @@ -0,0 +1,389 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop | FileCheck %s -check-prefixes=CHECK,XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX1 + +; +; Verify the cost model for 1 src shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> + ret void +} + +; +; Test some Identity Shuffles +; + +define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; CHECK-LABEL: 'identity_vXf32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-codesize.ll new file mode 100644 index 0000000..23f198a --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-codesize.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-latency.ll new file mode 100644 index 0000000..7aba0f2 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-latency.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-sizelatency.ll new file mode 100644 index 0000000..4e1d70c --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-fp16-sizelatency.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> undef, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll new file mode 100644 index 0000000..48507c0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll @@ -0,0 +1,389 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop | FileCheck %s -check-prefixes=CHECK,XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX1 + +; +; Verify the cost model for 1 src shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> + ret void +} + +; +; Test some Identity Shuffles +; + +define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; CHECK-LABEL: 'identity_vXf32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll new file mode 100644 index 0000000..2dd6bb3 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll @@ -0,0 +1,389 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop | FileCheck %s -check-prefixes=CHECK,XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX1 + +; +; Verify the cost model for 1 src shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> undef, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> undef, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> undef, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> undef, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> undef, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> + ret void +} + +; +; Test some Identity Shuffles +; + +define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; CHECK-LABEL: 'identity_vXf32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splice-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splice-codesize.ll new file mode 100644 index 0000000..f67d681 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splice-codesize.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for splice shuffles. +; + +define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> + %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> + %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> + %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> + %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> + %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> + %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> + %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> + %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> + %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> + %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> + %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> + %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> + %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> + %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> + %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> + %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> + ret void +} + diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splice-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splice-latency.ll new file mode 100644 index 0000000..8b02b82 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splice-latency.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for splice shuffles. +; + +define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> + %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> + %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> + %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> + %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> + %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> + %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> + %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> + %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> + %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> + %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> + %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> + %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> + %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> + %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> + %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> + %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> + ret void +} + diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splice-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splice-sizelatency.ll new file mode 100644 index 0000000..65558da --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splice-sizelatency.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for splice shuffles. +; + +define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> + %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> + %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> + %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> + %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> + %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> + %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> + %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> + %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> + %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> + %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> + %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> + %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> + %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> + %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> + %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> + %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> + ret void +} + diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose-codesize.ll new file mode 100644 index 0000000..53da745 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose-codesize.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for transpose shuffles. +; + +define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> + %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> + %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> + %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> + %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> + %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> + %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> + %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> + %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> + %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> + %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> + %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> + %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> + %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> + %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> + %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> + %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> + ret void +} + diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose-latency.ll new file mode 100644 index 0000000..8dd0ae2 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose-latency.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for transpose shuffles. +; + +define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> + %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> + %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> + %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> + %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> + %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> + %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> + %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> + %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> + %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> + %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> + %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> + %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> + %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> + %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> + %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> + %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> + ret void +} + diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose-sizelatency.ll new file mode 100644 index 0000000..0073a29 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose-sizelatency.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for transpose shuffles. +; + +define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> + %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> + %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> + %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> + %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> + %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> + %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> + %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %a32, <2 x i16> %b32, <4 x i16> %a64, <4 x i16> %b64, <8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> + %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> + %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> + %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> + %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> + %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> + %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> + %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> + %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> + %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> + ret void +} + diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll new file mode 100644 index 0000000..6b2029e --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-codesize.ll @@ -0,0 +1,370 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for 2 src shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <2 x i16> %src32_1, <4 x i16> %src64_1, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll new file mode 100644 index 0000000..ae05855 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-codesize.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024, <8 x half> %src128_1, <16 x half> %src256_1, <32 x half> %src512_1, <64 x half> %src1024_1) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll new file mode 100644 index 0000000..ca13511 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-latency.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024, <8 x half> %src128_1, <16 x half> %src256_1, <32 x half> %src512_1, <64 x half> %src1024_1) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll new file mode 100644 index 0000000..5312e8a --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-fp16-sizelatency.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512fp16 | FileCheck %s + +define void @test_vXf16(<8 x half> %src128, <16 x half> %src256, <32 x half> %src512, <64 x half> %src1024, <8 x half> %src128_1, <16 x half> %src256_1, <32 x half> %src512_1, <64 x half> %src1024_1) { +; CHECK-LABEL: 'test_vXf16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <8 x half> %src128, <8 x half> %src128_1, <8 x i32> + %V256 = shufflevector <16 x half> %src256, <16 x half> %src256_1, <16 x i32> + %V512 = shufflevector <32 x half> %src512, <32 x half> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x half> %src1024, <64 x half> %src1024_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll new file mode 100644 index 0000000..91b4ee9 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-latency.ll @@ -0,0 +1,370 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for 2 src shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <2 x i16> %src32_1, <4 x i16> %src64_1, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll new file mode 100644 index 0000000..8f40881 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-two-src-sizelatency.ll @@ -0,0 +1,370 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop | FileCheck %s -check-prefixes=XOP +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 + +; +; Verify the cost model for 2 src shuffles +; + +define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> + %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> + %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) { +; SSE-LABEL: 'test_vXi64' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi64' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_vXi64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> + %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> + %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> + %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <2 x float> %src64_1, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXf32' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXf32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXf32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x float> %src64, <2 x float> %src64_1, <2 x i32> + %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> + %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> + %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <2 x i32> %src64_1, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) { +; SSE-LABEL: 'test_vXi32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi32' +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_vXi32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %src64, <2 x i32> %src64_1, <2 x i32> + %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> + %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> + %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> + %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> + ret void +} + +define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <2 x i16> %src32_1, <4 x i16> %src64_1, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) { +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi16' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V32 = shufflevector <2 x i16> %src32, <2 x i16> %src32_1, <2 x i32> + %V64 = shufflevector <4 x i16> %src64, <4 x i16> %src64_1, <4 x i32> + %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> + %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> + %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> + %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> + ret void +} + +define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <2 x i8> %src16_1, <4 x i8> %src32_1, <8 x i8> %src64_1, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) { +; SSE2-LABEL: 'test_vXi8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; XOP-LABEL: 'test_vXi8' +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %V16 = shufflevector <2 x i8> %src16, <2 x i8> %src16_1, <2 x i32> + %V32 = shufflevector <4 x i8> %src32, <4 x i8> %src32_1, <4 x i32> + %V64 = shufflevector <8 x i8> %src64, <8 x i8> %src64_1, <8 x i32> + %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> + %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> + %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> + ret void +} -- 2.7.4