From 74b67e53c638fae6542372681cc181f1f4e23f12 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 22 Feb 2023 23:42:21 +0000 Subject: [PATCH] [LSR] Fix incorrect check in 73cd3d4391ad47ae7 I missed that the test needed a icelake-server cpu to fail, and left a testing "false &&" in the if condition. Hopefully this is now the correct fix. --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 +- llvm/test/CodeGen/X86/lsr-addrecloops.ll | 60 +++++++++-------------- 2 files changed, 24 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3887533..67c404a 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4990,7 +4990,7 @@ static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, const SCEV *Reg, MemAccessTy AccessType) { if (Best->getType() != Reg->getType() || - (false && isa(Best) && isa(Reg) && + (isa(Best) && isa(Reg) && cast(Best)->getLoop() != cast(Reg)->getLoop())) return false; diff --git a/llvm/test/CodeGen/X86/lsr-addrecloops.ll b/llvm/test/CodeGen/X86/lsr-addrecloops.ll index 76e8d4c..b0f97e3 100644 --- a/llvm/test/CodeGen/X86/lsr-addrecloops.ll +++ b/llvm/test/CodeGen/X86/lsr-addrecloops.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s ; Check that the SCEVs produced from the multiple loops don't attempt to get @@ -8,49 +7,36 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr nocapture readonly %2, i64 %3, i1 %min.iters.check840) { +define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr nocapture readonly %2, i64 %3, i1 %min.iters.check840) "target-cpu"="icelake-server" { ; CHECK-LABEL: in4dob_: ; CHECK: .LBB0_6: # %vector.body807 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: leaq (%rdi,%r10), %rbx -; CHECK-NEXT: movups %xmm0, -80(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -96(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -79(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -95(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -78(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -94(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -77(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -93(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -76(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -92(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -75(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -91(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -74(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -90(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -73(%rbx,%r9,4) -; CHECK-NEXT: movups %xmm0, -89(%rbx,%r9,4) -; CHECK-NEXT: addq $8, %r10 -; CHECK-NEXT: cmpq %r10, %r11 +; CHECK-NEXT: leaq (%rdi,%r9), %r11 +; CHECK-NEXT: vmovups %ymm0, (%rax,%r11) +; CHECK-NEXT: vmovups %ymm0, 1(%rax,%r11) +; CHECK-NEXT: vmovups %ymm0, 2(%rax,%r11) +; CHECK-NEXT: vmovups %ymm0, 3(%rax,%r11) +; CHECK-NEXT: vmovups %ymm0, 4(%rax,%r11) +; CHECK-NEXT: vmovups %ymm0, 5(%rax,%r11) +; CHECK-NEXT: vmovups %ymm0, 6(%rax,%r11) +; CHECK-NEXT: vmovups %ymm0, 7(%rax,%r11) +; CHECK-NEXT: addq $8, %r9 +; CHECK-NEXT: cmpq %r9, %r10 ; CHECK-NEXT: jne .LBB0_6 ; CHECK: .LBB0_14: # %vector.body847 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: leaq (%rdi,%rcx), %r8 -; CHECK-NEXT: movups %xmm0, 16(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, (%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 17(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 1(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 18(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 2(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 19(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 3(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 20(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 4(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 21(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 5(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 22(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 6(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 23(%r8,%r9,4) -; CHECK-NEXT: movups %xmm0, 7(%r8,%r9,4) +; CHECK-NEXT: vmovups %ymm0, 96(%rax,%r8) +; CHECK-NEXT: vmovups %ymm0, 97(%rax,%r8) +; CHECK-NEXT: vmovups %ymm0, 98(%rax,%r8) +; CHECK-NEXT: vmovups %ymm0, 99(%rax,%r8) +; CHECK-NEXT: vmovups %ymm0, 100(%rax,%r8) +; CHECK-NEXT: vmovups %ymm0, 101(%rax,%r8) +; CHECK-NEXT: vmovups %ymm0, 102(%rax,%r8) +; CHECK-NEXT: vmovups %ymm0, 103(%rax,%r8) +; CHECK-NEXT: addq $8, %rcx +; CHECK-NEXT: cmpq %rcx, %rdx +; CHECK-NEXT: jne .LBB0_14 .preheader263: %4 = shl i64 %3, 2 br label %5 -- 2.7.4