From a4276fb2947661dadbbf9955c6d530786a53d29e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 3 Feb 2015 15:37:18 +0000 Subject: [PATCH] Improve test to actually check for a folded load. This test was checking for lack of a "movaps" (an aligned load) rather than a "movups" (an unaligned load). It also included a store which complicated the checking. Add specific CPU runs to prevent subtarget feature flag overrides from inhibiting this optimization. llvm-svn: 227972 --- llvm/test/CodeGen/X86/fold-vex.ll | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/llvm/test/CodeGen/X86/fold-vex.ll b/llvm/test/CodeGen/X86/fold-vex.ll index 6d1b646..a0c5e22 100644 --- a/llvm/test/CodeGen/X86/fold-vex.ll +++ b/llvm/test/CodeGen/X86/fold-vex.ll @@ -1,16 +1,20 @@ +; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition. + ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s + +; No need to load unaligned operand from memory using an explicit instruction with AVX. +; The operand should be folded into the AND instr. -;CHECK: @test -; No need to load from memory. The operand will be loaded as part of the AND instr. -;CHECK-NOT: vmovaps -;CHECK: vandps -;CHECK: ret +define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind { + %in0 = load <4 x i32>* %p0, align 2 + %a = and <4 x i32> %in0, %in1 + ret <4 x i32> %a -define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind { -entry: - %in0 = load <8 x i32>* %p0, align 2 - %a = and <8 x i32> %in0, %in1 - store <8 x i32> %a, <8 x i32>* undef - ret void +; CHECK-LABEL: @test1 +; CHECK-NOT: vmovups +; CHECK: vandps (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: ret } -- 2.7.4