From c46aab01c002b7a04135b8b7f1f52d8c9ae23a58 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 17 Feb 2022 16:52:02 -0500 Subject: [PATCH] RegAllocGreedy: Fix last chance recolor assert in impossible case This example is not compilable without handling eviction of specific subregisters. Last chance recoloring was deciding it could try evicting an overlapping superregister, which doesn't help make any progress. The LiveIntervalUnion would then assert due to an overlapping / identical range when trying the new assignment. Unfortunately this is also producing a verifier error after the allocation fails. I've seen a number of these, and not sure if we should just start deleting the function on error rather than trying to figure out how to put together valid MIR. I'm not super confident this is the right place to fix this. I also have a number of failing testcases I need to fix by handling partial evictions of superregisters. --- llvm/lib/CodeGen/RegAllocGreedy.cpp | 8 ++- ...lloc-failure-overlapping-insert-assert.mir | 62 +++++++++++++++++++ .../regalloc-illegal-eviction-assert.ll | 26 ++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir create mode 100644 llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 9371712a23ce..817aeb6d4d9b 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1961,8 +1961,14 @@ bool RAGreedy::mayRecolorAllInterferences( // it would not be recolorable as it is in the same state as VirtReg. // However, if VirtReg has tied defs and Intf doesn't, then // there is still a point in examining if it can be recolorable. + // + // Also, don't try to evict a register which is assigned to an overlapping + // super register. + // + // TODO: Can we evict an interfering subset of the subregisters? if (((ExtraInfo->getStage(*Intf) == RS_Done && - MRI->getRegClass(Intf->reg()) == CurRC) && + (MRI->getRegClass(Intf->reg()) == CurRC || + TRI->regsOverlap(VRM->getPhys(Intf->reg()), PhysReg))) && !(hasTiedDef(MRI, VirtReg.reg()) && !hasTiedDef(MRI, Intf->reg()))) || FixedRegisters.count(Intf->reg())) { diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir new file mode 100644 index 000000000000..34b1cf695cee --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir @@ -0,0 +1,62 @@ +# RUN: not llc -march=amdgcn -mcpu=gfx908 -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: not --crash llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s + +# FIXME: We should not produce a verifier error after erroring + +# ERR: error: inline assembly requires more registers than available +# VERIFIER: *** Bad machine code: Using an undefined physical register *** + +# This testcase cannot be compiled with the enforced register +# budget. Previously, tryLastChanceRecoloring would assert here. It +# was attempting to recolor a superregister with an overlapping +# subregister over the same range. + +--- | + define void @foo() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="8,8" } + +... +--- +name: foo +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vreg_512 } + - { id: 3, class: vreg_256 } + - { id: 4, class: vreg_128 } + - { id: 5, class: vreg_96 } + - { id: 6, class: vreg_96 } + - { id: 7, class: vreg_512 } + - { id: 8, class: vreg_256 } + - { id: 9, class: vreg_128 } + - { id: 10, class: vreg_96 } + - { id: 11, class: vreg_96 } + - { id: 12, class: sreg_64 } + - { id: 13, class: ccr_sgpr_64 } + - { id: 14, class: vgpr_32 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0 (%ir-block.0): + liveins: $sgpr30_sgpr31 + + INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $agpr0 + %14:vgpr_32 = COPY killed $agpr0 + INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 16842762 /* regdef:VReg_512 */, def %7, 14155786 /* regdef:VReg_256 */, def %8, 5308426 /* regdef:VReg_128 */, def %9, 3866634 /* regdef:VReg_96 */, def %10, 3866634 /* regdef:VReg_96 */, def %11 + INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 16842761 /* reguse:VReg_512 */, %7 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 14155785 /* reguse:VReg_256 */, %8 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, %9 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_96 */, %10 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_96 */, %11 + $agpr1 = COPY %14 + INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $agpr1 + S_SETPC_B64_return killed renamable $sgpr30_sgpr31 + +... diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll b/llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll new file mode 100644 index 000000000000..0d80195ba09c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll @@ -0,0 +1,26 @@ +; RUN: not llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -o - %s 2>%t.err | FileCheck %s +; RUN: FileCheck -check-prefix=ERR %s < %t.err + +; ERR: error: inline assembly requires more registers than available +; ERR: error: inline assembly requires more registers than available + +%asm.output = type { <16 x i32>, <8 x i32>, <5 x i32>, <4 x i32>, <16 x i32> } + +; CHECK-LABEL: {{^}}illegal_eviction_assert: +; CHECK: ; def v[0:15] v[20:27] v[0:4] v[16:19] a[0:15] +; CHECK: ; clobber +; CHECK: ; use v[0:15] v[20:27] v[0:4] v[16:19] a[1:16] +define void @illegal_eviction_assert(<32 x i32> addrspace(1)* %arg) #0 { + ;%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"() + %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,={a[0:15]}"() + %vgpr0 = extractvalue %asm.output %asm, 0 + %vgpr1 = extractvalue %asm.output %asm, 1 + %vgpr2 = extractvalue %asm.output %asm, 2 + %vgpr3 = extractvalue %asm.output %asm, 3 + %agpr0 = extractvalue %asm.output %asm, 4 + call void asm sideeffect "; clobber", "~{v[0:31]}"() + call void asm sideeffect "; use $0 $1 $2 $3 $4","v,v,v,v,{a[1:16]}"(<16 x i32> %vgpr0, <8 x i32> %vgpr1, <5 x i32> %vgpr2, <4 x i32> %vgpr3, <16 x i32> %agpr0) + ret void +} + +attributes #0 = { "amdgpu-waves-per-eu"="8,8" } -- 2.34.1