From f1d8b7c49e29e0983c241862a4a78c3657620c36 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 23 Oct 2018 16:27:14 +0000 Subject: [PATCH] [CostModel][X86] Add transpose shuffle cost tests llvm-svn: 345045 --- .../Analysis/CostModel/X86/shuffle-transpose.ll | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll new file mode 100644 index 0000000..2a846bf --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+ssse3 | FileCheck %s -check-prefixes=CHECK,SSE,SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+sse4.2 | FileCheck %s -check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VBMI +; +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 + +; +; Verify the cost model for transpose shuffles. +; + +define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { +; SSE-LABEL: 'test_vXf64' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_vXf64' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXf64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> + %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> + %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> + ret void +} + +define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { +; CHECK-LABEL: 'test_vXi64' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXi64' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> + %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> + %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> + ret void +} + +define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { +; SSE-LABEL: 'test_vXf32' +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'test_vXf32' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'test_vXf32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXf32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> + %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> + %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> + %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> + ret void +} + +define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { +; CHECK-LABEL: 'test_vXi32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXi32' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> + %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> + %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> + %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> + ret void +} + +define void @test_vXi16(<8 x i16> %a128, <8 x i16> %b128, <16 x i16> %a256, <16 x i16> %b256, <32 x i16> %a512, <32 x i16> %b512) { +; CHECK-LABEL: 'test_vXi16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXi16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> + %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> + %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> + ret void +} + +define void @test_vXi8(<16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { +; CHECK-LABEL: 'test_vXi8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; BTVER2-LABEL: 'test_vXi8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> + %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> + %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> + ret void +} + -- 2.7.4