From: Matt Arsenault Date: Fri, 20 Sep 2019 00:09:15 +0000 (+0000) Subject: MachineScheduler: Fix missing dependency with multiple subreg defs X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dd74f4839b1291810f376e0a5739ddd0abff91be;p=platform%2Fupstream%2Fllvm.git MachineScheduler: Fix missing dependency with multiple subreg defs If an instruction had multiple subregister defs, and one of them was undef, this would improperly conclude all other lanes are killed. There could still be other defs of those read-undef lanes in other operands. This would improperly remove register uses from CurrentVRegUses, so the visitation of later operands would not find the necessary register dependency. This would also mean this would fail or not depending on how different subregister def operands were ordered. On an undef subregister def, scan the instruction for other subregister defs and avoid killing those. This possibly should be deferring removing anything from CurrentVRegUses until the entire instruction has been processed instead. llvm-svn: 372362 --- diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 92420e6..735af245 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -400,6 +400,18 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // earlier instruction. KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask; + if (MO.getSubReg() != 0 && MO.isUndef()) { + // There may be other subregister defs on the same instruction of the same + // register in later operands. The lanes of other defs will now be live + // after this instruction, so these should not be treated as killed by the + // instruction even though they appear to be killed in this one operand. + for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &OtherMO = MI->getOperand(I); + if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) + KillLaneMask &= ~getLaneMaskForMO(OtherMO); + } + } + // Clear undef flag, we'll re-add it later once we know which subregister // Def is first. MO.setIsUndef(false); diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir new file mode 100644 index 0000000..3a574977 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s + +# Deciding which lanes are killed needs to account for other defs in the +# instruction. +# +# addVRegDefDeps would encounter the %0.sub0 def and erase %0 from +# current vreg uses because it shared no lanes with %0.sub1 use on the +# nop. It then didn't see the lanemask when it reached the second +# subreg def, and failed to add the necessary dependency between the +# asm and S_NOP + +--- +name: no_live_subrange_at_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: no_live_subrange_at_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def undef %0.sub1 + ; CHECK: S_NOP 0, implicit %0.sub1 + ; CHECK: $sgpr10 = S_MOV_B32 -1 + ; CHECK: S_BRANCH %bb.1 + bb.0: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + bb.1: + %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3) + INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + INLINEASM &"", 1, 851977, %2 + INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def %0.sub1 + S_NOP 0, implicit %0.sub1 + $sgpr10 = S_MOV_B32 -1 + S_BRANCH %bb.1 + +... + +# Different operand order +--- +name: no_live_subrange_at_use_swap +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: no_live_subrange_at_use_swap + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1, 851978, def undef %0.sub1, 851978, def undef %0.sub0 + ; CHECK: S_NOP 0, implicit %0.sub1 + ; CHECK: $sgpr10 = S_MOV_B32 -1 + ; CHECK: S_BRANCH %bb.1 + bb.0: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + bb.1: + %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3) + INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + INLINEASM &"", 1, 851977, %2 + INLINEASM &"", 1, 851978, def %0.sub1, 851978, def undef %0.sub0 + S_NOP 0, implicit %0.sub1 + $sgpr10 = S_MOV_B32 -1 + S_BRANCH %bb.1 + +...