From d51576a3f06b97ead304f8df54e08dfdffa607fa Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Sep 2019 04:41:01 +0000 Subject: [PATCH] [X86] Add test case for missed opportunity to commute a VCMP instruction after unfolding one load in order to fold another load. llvm-svn: 372066 --- .../CodeGen/X86/avx512-broadcast-unfold.ll | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll index fb097dda114c..e337fa3ec992 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -4448,3 +4448,40 @@ bb1: ; preds = %bb1, %bb bb10: ; preds = %bb1 ret void } + +define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) { +; CHECK-LABEL: bcast_unfold_cmp_v8f32_refold: +; CHECK: # %bb.0: +; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0] +; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0] +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm2 +; CHECK-NEXT: vcmpltps %ymm0, %ymm2, %k1 +; CHECK-NEXT: vmovaps %ymm1, %ymm2 +; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 {%k1} +; CHECK-NEXT: vmovups %ymm2, 4096(%rdi,%rax) +; CHECK-NEXT: addq $32, %rax +; CHECK-NEXT: jne .LBB126_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + br label %2 + +2: ; preds = %2, %1 + %3 = phi i64 [ 0, %1 ], [ %10, %2 ] + %4 = getelementptr inbounds float, float* %0, i64 %3 + %5 = bitcast float* %4 to <8 x float>* + %6 = load <8 x float>, <8 x float>* %5, align 4 + %7 = fcmp olt <8 x float> %6, + %8 = select <8 x i1> %7, <8 x float> , <8 x float> + %9 = bitcast float* %4 to <8 x float>* + store <8 x float> %8, <8 x float>* %9, align 4 + %10 = add i64 %3, 8 + %11 = icmp eq i64 %10, 1024 + br i1 %11, label %12, label %2 + +12: ; preds = %2 + ret void +} -- 2.34.1