From 3c6fbad38866975379145dfa39ef0dac44209771 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Sat, 13 May 2017 05:39:46 +0000 Subject: [PATCH] InstCombine: Move tests that use target intrinsics into subdirectories Tests with target intrinsics are inherently target specific, so it doesn't actually make sense to run them if we've excluded their target. llvm-svn: 302979 --- .../{ => AArch64}/2012-04-23-Neon-Intrinsics.ll | 68 +------------ .../Transforms/InstCombine/AArch64/lit.local.cfg | 2 + .../InstCombine/{ => AMDGPU}/amdgcn-intrinsics.ll | 0 .../Transforms/InstCombine/AMDGPU/lit.local.cfg | 2 + .../InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll | 65 ++++++++++++ .../InstCombine/{ => ARM}/constant-fold-hang.ll | 0 llvm/test/Transforms/InstCombine/ARM/lit.local.cfg | 2 + .../InstCombine/{ => ARM}/neon-intrinsics.ll | 0 .../InstCombine/{ => PowerPC}/aligned-altivec.ll | 0 .../InstCombine/{ => PowerPC}/aligned-qpx.ll | 0 .../Transforms/InstCombine/PowerPC/lit.local.cfg | 3 + .../InstCombine/{ => PowerPC}/vsx-unaligned.ll | 0 .../InstCombine/{ => X86}/X86FsubCmpCombine.ll | 0 .../Transforms/InstCombine/{ => X86}/blend_x86.ll | 0 llvm/test/Transforms/InstCombine/X86/lit.local.cfg | 2 + .../Transforms/InstCombine/{ => X86}/pr2645-1.ll | 0 .../InstCombine/{ => X86}/shufflemask-undef.ll | 3 +- .../Transforms/InstCombine/{ => X86}/x86-avx2.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-avx512.ll | 0 .../InstCombine/{ => X86}/x86-crc32-demanded.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-f16c.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-fma.ll | 0 .../InstCombine/{ => X86}/x86-insertps.ll | 0 .../InstCombine/{ => X86}/x86-masked-memops.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-movmsk.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-muldq.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-pack.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-pshufb.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-sse.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-sse2.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-sse41.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-sse4a.ll | 0 .../InstCombine/X86/x86-vec_demanded_elts.ll | 110 +++++++++++++++++++++ .../InstCombine/{ => X86}/x86-vector-shifts.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-vperm2.ll | 0 .../InstCombine/{ => X86}/x86-vpermil.ll | 0 .../Transforms/InstCombine/{ => X86}/x86-xop.ll | 0 .../Transforms/InstCombine/vec_demanded_elts.ll | 108 -------------------- 38 files changed, 190 insertions(+), 175 deletions(-) rename llvm/test/Transforms/InstCombine/{ => AArch64}/2012-04-23-Neon-Intrinsics.ll (51%) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/lit.local.cfg rename llvm/test/Transforms/InstCombine/{ => AMDGPU}/amdgcn-intrinsics.ll (100%) create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/lit.local.cfg create mode 100644 llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll rename llvm/test/Transforms/InstCombine/{ => ARM}/constant-fold-hang.ll (100%) create mode 100644 llvm/test/Transforms/InstCombine/ARM/lit.local.cfg rename llvm/test/Transforms/InstCombine/{ => ARM}/neon-intrinsics.ll (100%) rename llvm/test/Transforms/InstCombine/{ => PowerPC}/aligned-altivec.ll (100%) rename llvm/test/Transforms/InstCombine/{ => PowerPC}/aligned-qpx.ll (100%) create mode 100644 llvm/test/Transforms/InstCombine/PowerPC/lit.local.cfg rename llvm/test/Transforms/InstCombine/{ => PowerPC}/vsx-unaligned.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/X86FsubCmpCombine.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/blend_x86.ll (100%) create mode 100644 llvm/test/Transforms/InstCombine/X86/lit.local.cfg rename llvm/test/Transforms/InstCombine/{ => X86}/pr2645-1.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/shufflemask-undef.ll (99%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-avx2.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-avx512.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-crc32-demanded.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-f16c.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-fma.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-insertps.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-masked-memops.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-movmsk.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-muldq.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-pack.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-pshufb.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-sse.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-sse2.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-sse41.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-sse4a.ll (100%) create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll rename llvm/test/Transforms/InstCombine/{ => X86}/x86-vector-shifts.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-vperm2.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-vpermil.ll (100%) rename llvm/test/Transforms/InstCombine/{ => X86}/x86-xop.ll (100%) diff --git a/llvm/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll similarity index 51% rename from llvm/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll rename to llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll index 39408a2..04fb7d9 100644 --- a/llvm/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll @@ -1,70 +1,6 @@ ; RUN: opt -S -instcombine < %s | FileCheck %s - -define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> zeroinitializer -} - -define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> ) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> -; CHECK-NEXT: ret <4 x i32> %a -} - -define <4 x i32> @constantMul() nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %a -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @constantMulS() nounwind readnone ssp { -entry: - %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @constantMulU() nounwind readnone ssp { -entry: - %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> , <4 x i16> ) nounwind - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) nounwind - %b = add <4 x i32> zeroinitializer, %a - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] -; CHECK-NEXT: ret <4 x i32> %a -} - -define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { -entry: - %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind - %b = add <4 x i32> %x, %a - ret <4 x i32> %b -; CHECK: entry: -; CHECK-NEXT: %b = add <4 x i32> %x, -; CHECK-NEXT: ret <4 x i32> %b -} - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone - -; ARM64 variants - +; ARM64 neon intrinsic variants - +; REQUIRES: aarch64 define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp { entry: diff --git a/llvm/test/Transforms/InstCombine/AArch64/lit.local.cfg b/llvm/test/Transforms/InstCombine/AArch64/lit.local.cfg new file mode 100644 index 0000000..7184443 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InstCombine/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/amdgcn-intrinsics.ll rename to llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/lit.local.cfg b/llvm/test/Transforms/InstCombine/AMDGPU/lit.local.cfg new file mode 100644 index 0000000..2a665f0 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll new file mode 100644 index 0000000..9efed36 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll @@ -0,0 +1,65 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> zeroinitializer +} + +define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @constantMul() nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %a +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulS() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @constantMulU() nounwind readnone ssp { +entry: + %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> , <4 x i16> ) nounwind + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: ret <4 x i32> +} + +define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) nounwind + %b = add <4 x i32> zeroinitializer, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> %x) [[NUW:#[0-9]+]] +; CHECK-NEXT: ret <4 x i32> %a +} + +define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { +entry: + %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind + %b = add <4 x i32> %x, %a + ret <4 x i32> %b +; CHECK: entry: +; CHECK-NEXT: %b = add <4 x i32> %x, +; CHECK-NEXT: ret <4 x i32> %b +} + +declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone diff --git a/llvm/test/Transforms/InstCombine/constant-fold-hang.ll b/llvm/test/Transforms/InstCombine/ARM/constant-fold-hang.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/constant-fold-hang.ll rename to llvm/test/Transforms/InstCombine/ARM/constant-fold-hang.ll diff --git a/llvm/test/Transforms/InstCombine/ARM/lit.local.cfg b/llvm/test/Transforms/InstCombine/ARM/lit.local.cfg new file mode 100644 index 0000000..236e1d3 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InstCombine/neon-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/neon-intrinsics.ll rename to llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll diff --git a/llvm/test/Transforms/InstCombine/aligned-altivec.ll b/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/aligned-altivec.ll rename to llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll diff --git a/llvm/test/Transforms/InstCombine/aligned-qpx.ll b/llvm/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/aligned-qpx.ll rename to llvm/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll diff --git a/llvm/test/Transforms/InstCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/InstCombine/PowerPC/lit.local.cfg new file mode 100644 index 0000000..5d33887 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/PowerPC/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/InstCombine/vsx-unaligned.ll b/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/vsx-unaligned.ll rename to llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll diff --git a/llvm/test/Transforms/InstCombine/X86FsubCmpCombine.ll b/llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/X86FsubCmpCombine.ll rename to llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll diff --git a/llvm/test/Transforms/InstCombine/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/blend_x86.ll rename to llvm/test/Transforms/InstCombine/X86/blend_x86.ll diff --git a/llvm/test/Transforms/InstCombine/X86/lit.local.cfg b/llvm/test/Transforms/InstCombine/X86/lit.local.cfg new file mode 100644 index 0000000..c8625f4 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InstCombine/pr2645-1.ll b/llvm/test/Transforms/InstCombine/X86/pr2645-1.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/pr2645-1.ll rename to llvm/test/Transforms/InstCombine/X86/pr2645-1.ll diff --git a/llvm/test/Transforms/InstCombine/shufflemask-undef.ll b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll similarity index 99% rename from llvm/test/Transforms/InstCombine/shufflemask-undef.ll rename to llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll index 10509a9..d95c42d 100644 --- a/llvm/test/Transforms/InstCombine/shufflemask-undef.ll +++ b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -instcombine -S | not grep "shufflevector.*i32 8" +; RUN: opt < %s -instcombine -S | FileCheck %s +; CHECK-NOT: shufflevector{{.*}}i32 8" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9" diff --git a/llvm/test/Transforms/InstCombine/x86-avx2.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-avx2.ll rename to llvm/test/Transforms/InstCombine/X86/x86-avx2.ll diff --git a/llvm/test/Transforms/InstCombine/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-avx512.ll rename to llvm/test/Transforms/InstCombine/X86/x86-avx512.ll diff --git a/llvm/test/Transforms/InstCombine/x86-crc32-demanded.ll b/llvm/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-crc32-demanded.ll rename to llvm/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll diff --git a/llvm/test/Transforms/InstCombine/x86-f16c.ll b/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-f16c.ll rename to llvm/test/Transforms/InstCombine/X86/x86-f16c.ll diff --git a/llvm/test/Transforms/InstCombine/x86-fma.ll b/llvm/test/Transforms/InstCombine/X86/x86-fma.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-fma.ll rename to llvm/test/Transforms/InstCombine/X86/x86-fma.ll diff --git a/llvm/test/Transforms/InstCombine/x86-insertps.ll b/llvm/test/Transforms/InstCombine/X86/x86-insertps.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-insertps.ll rename to llvm/test/Transforms/InstCombine/X86/x86-insertps.ll diff --git a/llvm/test/Transforms/InstCombine/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-masked-memops.ll rename to llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll diff --git a/llvm/test/Transforms/InstCombine/x86-movmsk.ll b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-movmsk.ll rename to llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll diff --git a/llvm/test/Transforms/InstCombine/x86-muldq.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-muldq.ll rename to llvm/test/Transforms/InstCombine/X86/x86-muldq.ll diff --git a/llvm/test/Transforms/InstCombine/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-pack.ll rename to llvm/test/Transforms/InstCombine/X86/x86-pack.ll diff --git a/llvm/test/Transforms/InstCombine/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-pshufb.ll rename to llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll diff --git a/llvm/test/Transforms/InstCombine/x86-sse.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-sse.ll rename to llvm/test/Transforms/InstCombine/X86/x86-sse.ll diff --git a/llvm/test/Transforms/InstCombine/x86-sse2.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse2.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-sse2.ll rename to llvm/test/Transforms/InstCombine/X86/x86-sse2.ll diff --git a/llvm/test/Transforms/InstCombine/x86-sse41.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-sse41.ll rename to llvm/test/Transforms/InstCombine/X86/x86-sse41.ll diff --git a/llvm/test/Transforms/InstCombine/x86-sse4a.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-sse4a.ll rename to llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll new file mode 100644 index 0000000..5ad8e76 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll @@ -0,0 +1,110 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i16 @test1(float %f) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[TMP281:%.*]] = fadd float %f, -1.000000e+00 +; CHECK-NEXT: [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01 +; CHECK-NEXT: [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0 +; CHECK-NEXT: [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> ) +; CHECK-NEXT: [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> ) +; CHECK-NEXT: [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]]) +; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16 +; CHECK-NEXT: ret i16 [[TMP69]] +; + %tmp = insertelement <4 x float> undef, float %f, i32 0 + %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 + %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 + %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 + %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) + %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) + %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) + %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) + %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) + %tmp69 = trunc i32 %tmp.upgrd.1 to i16 + ret i16 %tmp69 +} + +define i64 @test3(float %f, double %d) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]]) +; CHECK-NEXT: [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]]) +; CHECK-NEXT: [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]]) +; CHECK-NEXT: [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]]) +; CHECK-NEXT: [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]]) +; CHECK-NEXT: [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]]) +; CHECK-NEXT: [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]]) +; CHECK-NEXT: [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]]) +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]] +; CHECK-NEXT: ret i64 [[TMP15]] +; + %v00 = insertelement <4 x float> undef, float %f, i32 0 + %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1 + %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2 + %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3 + %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03) + %v10 = insertelement <4 x float> undef, float %f, i32 0 + %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1 + %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2 + %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3 + %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13) + %v20 = insertelement <4 x float> undef, float %f, i32 0 + %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1 + %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2 + %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3 + %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23) + %v30 = insertelement <4 x float> undef, float %f, i32 0 + %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1 + %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2 + %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3 + %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33) + %v40 = insertelement <2 x double> undef, double %d, i32 0 + %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1 + %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41) + %v50 = insertelement <2 x double> undef, double %d, i32 0 + %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1 + %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51) + %v60 = insertelement <2 x double> undef, double %d, i32 0 + %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1 + %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61) + %v70 = insertelement <2 x double> undef, double %d, i32 0 + %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1 + %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71) + %tmp8 = add i32 %tmp0, %tmp2 + %tmp9 = add i32 %tmp4, %tmp6 + %tmp10 = add i32 %tmp8, %tmp9 + %tmp11 = sext i32 %tmp10 to i64 + %tmp12 = add i64 %tmp1, %tmp3 + %tmp13 = add i64 %tmp5, %tmp7 + %tmp14 = add i64 %tmp12, %tmp13 + %tmp15 = add i64 %tmp11, %tmp14 + ret i64 %tmp15 +} + +declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) +declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) +declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) +declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) diff --git a/llvm/test/Transforms/InstCombine/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-vector-shifts.ll rename to llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll diff --git a/llvm/test/Transforms/InstCombine/x86-vperm2.ll b/llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-vperm2.ll rename to llvm/test/Transforms/InstCombine/X86/x86-vperm2.ll diff --git a/llvm/test/Transforms/InstCombine/x86-vpermil.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-vpermil.ll rename to llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll similarity index 100% rename from llvm/test/Transforms/InstCombine/x86-xop.ll rename to llvm/test/Transforms/InstCombine/X86/x86-xop.ll diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index 5f27634..00efbe0 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -2,30 +2,6 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define i16 @test1(float %f) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[TMP281:%.*]] = fadd float %f, -1.000000e+00 -; CHECK-NEXT: [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01 -; CHECK-NEXT: [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0 -; CHECK-NEXT: [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> ) -; CHECK-NEXT: [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> ) -; CHECK-NEXT: [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]]) -; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16 -; CHECK-NEXT: ret i16 [[TMP69]] -; - %tmp = insertelement <4 x float> undef, float %f, i32 0 - %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 - %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 - %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 - %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) - %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) - %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) - %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) - %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) - %tmp69 = trunc i32 %tmp.upgrd.1 to i16 - ret i16 %tmp69 -} - define i32 @test2(float %f) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[TMP5:%.*]] = fmul float %f, %f @@ -42,77 +18,6 @@ define i32 @test2(float %f) { ret i32 %tmp21 } -define i64 @test3(float %f, double %d) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]]) -; CHECK-NEXT: [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]]) -; CHECK-NEXT: [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]]) -; CHECK-NEXT: [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]]) -; CHECK-NEXT: [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]]) -; CHECK-NEXT: [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]]) -; CHECK-NEXT: [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]]) -; CHECK-NEXT: [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]]) -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]] -; CHECK-NEXT: ret i64 [[TMP15]] -; - %v00 = insertelement <4 x float> undef, float %f, i32 0 - %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1 - %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2 - %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3 - %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03) - %v10 = insertelement <4 x float> undef, float %f, i32 0 - %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1 - %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2 - %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3 - %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13) - %v20 = insertelement <4 x float> undef, float %f, i32 0 - %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1 - %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2 - %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3 - %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23) - %v30 = insertelement <4 x float> undef, float %f, i32 0 - %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1 - %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2 - %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3 - %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33) - %v40 = insertelement <2 x double> undef, double %d, i32 0 - %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1 - %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41) - %v50 = insertelement <2 x double> undef, double %d, i32 0 - %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1 - %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51) - %v60 = insertelement <2 x double> undef, double %d, i32 0 - %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1 - %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61) - %v70 = insertelement <2 x double> undef, double %d, i32 0 - %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1 - %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71) - %tmp8 = add i32 %tmp0, %tmp2 - %tmp9 = add i32 %tmp4, %tmp6 - %tmp10 = add i32 %tmp8, %tmp9 - %tmp11 = sext i32 %tmp10 to i64 - %tmp12 = add i64 %tmp1, %tmp3 - %tmp13 = add i64 %tmp5, %tmp7 - %tmp14 = add i64 %tmp12, %tmp13 - %tmp15 = add i64 %tmp11, %tmp14 - ret i64 %tmp15 -} - define void @get_image() nounwind { ; CHECK-LABEL: @get_image( ; CHECK-NEXT: entry: @@ -156,18 +61,6 @@ entry: } declare i32 @fgetc(i8*) -declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) -declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) -declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) -declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) -declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) -declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) -declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) -declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) -declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) -declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) -declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) -declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind { ; CHECK-LABEL: @dead_shuffle_elt( @@ -248,4 +141,3 @@ define <2 x i64> @PR24922(<2 x i64> %v) { %result = select <2 x i1> bitcast (<4 x i32> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer ret <2 x i64> %result } - -- 2.7.4