From 1d0d86ae40fbc7b6dfc2b39098c9fe4e47e3a710 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Fri, 14 Dec 2018 14:07:57 +0000 Subject: [PATCH] [RegAllocGreedy] IMPLICIT_DEF values shouldn't prefer registers It costs nothing to spill an IMPLICIT_DEF value (the only spill code that's generated is a KILL of the value), so when creating split constraints if the live-out value is IMPLICIT_DEF the exit constraint should be DontCare instead of PrefReg. Differential Revision: https://reviews.llvm.org/D55652 llvm-svn: 349151 --- llvm/lib/CodeGen/RegAllocGreedy.cpp | 5 +- .../CodeGen/AMDGPU/subreg-split-live-in-error.mir | 21 +++-- llvm/test/CodeGen/Thumb/ragreedy-implicit-def.ll | 99 ++++++++++++++++++++++ llvm/test/CodeGen/X86/pr38795.ll | 96 ++++++++++----------- 4 files changed, 166 insertions(+), 55 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb/ragreedy-implicit-def.ll diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 64da848..f244028 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1183,7 +1183,10 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, BC.Number = BI.MBB->getNumber(); Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = (BI.LiveOut && + !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef()) + ? SpillPlacement::PrefReg + : SpillPlacement::DontCare; BC.ChangesValue = BI.FirstDef.isValid(); if (!Intf.hasInterference()) diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir index 4458a04..26228df 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -102,6 +102,13 @@ body: | bb.7: successors: %bb.13(0x80000000) + + ; In reality we are checking that this code doesn't assert when splitting + ; and inserting a spill. Here we just check that the point where the error + ; occurs we see a correctly generated spill. + ; GCN-LABEL: bb.7: + ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec + undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec %15.sub1:vreg_128 = COPY %15.sub0 %15.sub2:vreg_128 = COPY %15.sub0 @@ -114,6 +121,10 @@ body: | bb.9: successors: %bb.12(0x80000000) + + ; GCN-LABEL: bb.9: + ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec + undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec %15.sub1:vreg_128 = COPY %15.sub0 %15.sub2:vreg_128 = COPY %15.sub0 @@ -121,6 +132,10 @@ body: | bb.10: successors: %bb.12(0x80000000) + + ; GCN-LABEL: bb.10: + ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec + undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec %15.sub1:vreg_128 = COPY %15.sub0 %15.sub2:vreg_128 = COPY %15.sub0 @@ -143,12 +158,6 @@ body: | bb.13: successors: %bb.15(0x40000000), %bb.14(0x40000000) - ; In reality we are checking that this code doesn't assert when splitting - ; and inserting a spill. Here we just check that the point where the error - ; occurs we see a correctly generated spill. - ; GCN-LABEL: bb.13: - ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec - %18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $exec %19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0 :: (dereferenceable invariant load 16) diff --git a/llvm/test/CodeGen/Thumb/ragreedy-implicit-def.ll b/llvm/test/CodeGen/Thumb/ragreedy-implicit-def.ll new file mode 100644 index 0000000..d16b25e --- /dev/null +++ b/llvm/test/CodeGen/Thumb/ragreedy-implicit-def.ll @@ -0,0 +1,99 @@ +; REQUIRES: asserts +; RUN: llc -mtriple=thumbv6m -regalloc=greedy -stats < %s 2>&1 | FileCheck %s + +; Undef incoming values to phis end up creating IMPLICIT_DEF values. If we don't +; prefer them to be in a register then we get fewer spilled live ranges (6 +; compared to 7). +; CHECK: 6 regalloc - Number of spilled live ranges + +declare i32 @otherfn(i32) +define void @fn(i32 %val, i32* %ptr) { +entry: + %gep1 = getelementptr i32, i32* %ptr, i32 0 + %gep2 = getelementptr i32, i32* %ptr, i32 1 + %gep3 = getelementptr i32, i32* %ptr, i32 2 + %gep4 = getelementptr i32, i32* %ptr, i32 3 + %gep5 = getelementptr i32, i32* %ptr, i32 4 + %gep6 = getelementptr i32, i32* %ptr, i32 5 + %gep7 = getelementptr i32, i32* %ptr, i32 6 + %gep8 = getelementptr i32, i32* %ptr, i32 7 + %cmp1 = icmp uge i32 %val, 3 + br i1 %cmp1, label %if, label %then + +if: + %val1 = load i32, i32* %gep1, align 4 + %val2 = load i32, i32* %gep2, align 4 + %val3 = load i32, i32* %gep3, align 4 + %val4 = load i32, i32* %gep4, align 4 + %val5 = load i32, i32* %gep5, align 4 + %val6 = load i32, i32* %gep6, align 4 + %val7 = load i32, i32* %gep7, align 4 + %val8 = load i32, i32* %gep8, align 4 + br label %then + +then: + %phi1a = phi i32 [ %val1, %if ], [ undef, %entry ] + %phi2a = phi i32 [ %val2, %if ], [ undef, %entry ] + %phi3a = phi i32 [ %val3, %if ], [ undef, %entry ] + %phi4a = phi i32 [ %val4, %if ], [ undef, %entry ] + %phi5a = phi i32 [ %val5, %if ], [ undef, %entry ] + %phi6a = phi i32 [ %val6, %if ], [ undef, %entry ] + %phi7a = phi i32 [ %val7, %if ], [ undef, %entry ] + %phi8a = phi i32 [ %val8, %if ], [ undef, %entry ] + %switchval = call i32 @otherfn(i32 %val) + switch i32 %switchval, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 5, label %case5 + i32 6, label %case6 + i32 7, label %case7 + i32 8, label %case8 + ] + +default: + br label %switchend + +case0: + br label %switchend + +case1: + br label %switchend + +case5: + br label %switchend + +case6: + br label %switchend + +case7: + br label %switchend + +case8: + br label %switchend + +switchend: + %phi1b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi1a, %case5 ], [ 1, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %phi2b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi2a, %case5 ], [ 2, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %phi3b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi3a, %case5 ], [ 3, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %phi4b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi4a, %case5 ], [ 4, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %phi5b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi5a, %case5 ], [ 5, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %phi6b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi6a, %case5 ], [ 6, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %phi7b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi7a, %case5 ], [ 7, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %phi8b = phi i32 [ 0, %default ], [ undef, %case0 ], [ undef, %case1 ], [ %phi8a, %case5 ], [ 8, %case6 ], [ 2, %case7 ], [ 1, %case8 ] + %cmp2 = icmp uge i32 %val, 4 + br i1 %cmp2, label %if2, label %end + +if2: + store i32 %phi1b, i32* %gep1, align 4 + store i32 %phi2b, i32* %gep2, align 4 + store i32 %phi3b, i32* %gep3, align 4 + store i32 %phi4b, i32* %gep4, align 4 + store i32 %phi5b, i32* %gep5, align 4 + store i32 %phi6b, i32* %gep6, align 4 + store i32 %phi7b, i32* %gep7, align 4 + store i32 %phi8b, i32* %gep8, align 4 + br label %end + +end: + ret void +} diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index 6cb2a08..11534a9 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -24,22 +24,23 @@ define dso_local void @fn() { ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: # implicit-def: $esi +; CHECK-NEXT: # implicit-def: $ecx ; CHECK-NEXT: # implicit-def: $edi -; CHECK-NEXT: # implicit-def: $ah ; CHECK-NEXT: # implicit-def: $al -; CHECK-NEXT: # implicit-def: $edx +; CHECK-NEXT: # kill: killed $al +; CHECK-NEXT: # implicit-def: $dl +; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_16: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; CHECK-NEXT: movb %cl, %al +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_20 Depth 2 -; CHECK-NEXT: cmpb $8, %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: cmpb $8, %dl +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 ; CHECK-NEXT: # %bb.2: # %for.cond ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 @@ -47,28 +48,27 @@ define dso_local void @fn() { ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.4: # %if.end ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %edx, %ebp -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl a -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h -; CHECK-NEXT: cmpb $8, %al +; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: jg .LBB0_8 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: calll printf -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movl %ebp, %edx +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: jne .LBB0_16 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: # %bb.6: # %for.cond35 @@ -91,19 +91,16 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: .LBB0_19: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ch -; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: # implicit-def: $edx +; CHECK-NEXT: # implicit-def: $dl +; CHECK-NEXT: # implicit-def: $dh +; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_20 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: calll printf -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: jne .LBB0_11 @@ -111,25 +108,15 @@ define dso_local void @fn() { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_8: # %if.end21 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $edx +; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: je .LBB0_13 +; CHECK-NEXT: jmp .LBB0_10 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_17 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $edi -; CHECK-NEXT: # implicit-def: $ch -; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: # kill: killed $cl -; CHECK-NEXT: # implicit-def: $edx -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 ; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: movb %dl, %dh +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_20: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 @@ -140,25 +127,38 @@ define dso_local void @fn() { ; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: # %bb.22: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %ch, %al +; CHECK-NEXT: # %bb.9: # %ae +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB0_10 ; CHECK-NEXT: .LBB0_13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: je .LBB0_16 ; CHECK-NEXT: # %bb.14: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: testl %ebp, %ebp ; CHECK-NEXT: jne .LBB0_16 ; CHECK-NEXT: # %bb.15: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: jmp .LBB0_16 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: je .LBB0_17 +; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $edi +; CHECK-NEXT: # implicit-def: $cl +; CHECK-NEXT: # kill: killed $cl +; CHECK-NEXT: # implicit-def: $dl +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: jmp .LBB0_7 entry: br label %for.cond -- 2.7.4