[X86][CodeGenPrepare] Try to reuse IV's incremented value instead of adding the offse...

author Max Kazantsev <mkazantsev@azul.com>

Thu, 4 Mar 2021 08:41:22 +0000 (15:41 +0700)

committer Max Kazantsev <mkazantsev@azul.com>

Thu, 4 Mar 2021 09:47:43 +0000 (16:47 +0700)
author Max Kazantsev <mkazantsev@azul.com>
Thu, 4 Mar 2021 08:41:22 +0000 (15:41 +0700)
committer Max Kazantsev <mkazantsev@azul.com>
Thu, 4 Mar 2021 09:47:43 +0000 (16:47 +0700)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp

index ecf14b1..5d6c8ec 100644 (file)
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -3884,13 +3884,15 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
    // In this case, we may reuse the IV increment instead of the IV Phi to
    // achieve the following advantages:
    // 1. If IV step matches the offset, we will have no need in the offset;
+  // 2. Even if they don't match, we will reduce the overlap of living IV
+  //    and IV increment, that will potentially lead to better register
+  //    assignment.
    if (AddrMode.BaseOffs) {
      if (auto IVStep = GetConstantStep(ScaleReg)) {
        Instruction *IVInc = IVStep->first;
        APInt Step = IVStep->second;
        APInt Offset = Step * AddrMode.Scale;
-      if (Offset.isSignedIntN(64) && TestAddrMode.BaseOffs == Offset &&
-          DT.dominates(IVInc, MemoryInst)) {
+      if (Offset.isSignedIntN(64) && DT.dominates(IVInc, MemoryInst)) {
          TestAddrMode.InBounds = false;
          TestAddrMode.ScaledReg = IVInc;
          TestAddrMode.BaseOffs -= Offset.getLimitedValue();
diff --git a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll

index 900e12d..c004523 100644 (file)
--- a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
+++ b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
@@ -44,16 +44,14 @@ failure:                                          ; preds = %backedge
  define i32 @test_01a(i32* %p, i64 %len, i32 %x) {
  ; CHECK-LABEL: test_01a:
  ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movq %rsi, %rax
  ; CHECK-NEXT:    .p2align 4, 0x90
  ; CHECK-NEXT:  LBB1_1: ## %loop
  ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    subq $1, %rax
+; CHECK-NEXT:    subq $1, %rsi
  ; CHECK-NEXT:    jb LBB1_4
  ; CHECK-NEXT:  ## %bb.2: ## %backedge
  ; CHECK-NEXT:    ## in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT:    cmpl %edx, -28(%rdi,%rsi,4)
-; CHECK-NEXT:    movq %rax, %rsi
+; CHECK-NEXT:    cmpl %edx, -24(%rdi,%rsi,4)
  ; CHECK-NEXT:    jne LBB1_1
  ; CHECK-NEXT:  ## %bb.3: ## %failure
  ; CHECK-NEXT:    ud2
diff --git a/llvm/test/CodeGen/X86/overflowing-iv.ll b/llvm/test/CodeGen/X86/overflowing-iv.ll

index 1d5f3c2..3523447 100644 (file)
--- a/llvm/test/CodeGen/X86/overflowing-iv.ll
+++ b/llvm/test/CodeGen/X86/overflowing-iv.ll
@@ -12,10 +12,10 @@ define i32 @test_01(i32* %p, i64 %len, i32 %x) {
  ; CHECK-NEXT:    [[COND_1:%.*]] = icmp eq i64 [[IV]], [[LEN:%.*]]
  ; CHECK-NEXT:    br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
  ; CHECK:       backedge:
-; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[IV_NEXT]], 4
  ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8*
  ; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]]
-; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4
+; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -8
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
  ; CHECK-NEXT:    [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
  ; CHECK-NEXT:    [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
diff --git a/llvm/test/CodeGen/X86/usub_inc_iv.ll b/llvm/test/CodeGen/X86/usub_inc_iv.ll

index 7744319..a3097e9 100644 (file)
--- a/llvm/test/CodeGen/X86/usub_inc_iv.ll
+++ b/llvm/test/CodeGen/X86/usub_inc_iv.ll
@@ -59,10 +59,10 @@ define i32 @test_01a(i32* %p, i64 %len, i32 %x) {
  ; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
  ; CHECK-NEXT:    br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]]
  ; CHECK:       backedge:
-; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[MATH]], 4
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8*
  ; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]]
-; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -28
+; CHECK-NEXT:    [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -24
  ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
  ; CHECK-NEXT:    [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4
  ; CHECK-NEXT:    [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
author	Max Kazantsev <mkazantsev@azul.com>
	Thu, 4 Mar 2021 08:41:22 +0000 (15:41 +0700)
committer	Max Kazantsev <mkazantsev@azul.com>
	Thu, 4 Mar 2021 09:47:43 +0000 (16:47 +0700)
llvm/lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll		patch \| blob \| history
llvm/test/CodeGen/X86/overflowing-iv.ll		patch \| blob \| history
llvm/test/CodeGen/X86/usub_inc_iv.ll		patch \| blob \| history