From: Matt Arsenault Date: Mon, 21 Oct 2019 19:53:46 +0000 (+0000) Subject: AMDGPU: Erase redundant redefs of m0 in SIFoldOperands X-Git-Tag: llvmorg-11-init~6009 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8ebbf25cb1e9b2c95903917b2aa72363e5b20a42;p=platform%2Fupstream%2Fllvm.git AMDGPU: Erase redundant redefs of m0 in SIFoldOperands Only handle simple inter-block redefs of m0 to the same value. This avoids interference from redefs of m0 in SILoadStoreOptimzer. I was initially teaching that pass to ignore redefs of m0, but having them not exist beforehand is much simpler. This is in preparation for deleting the current special m0 handling in SIFixSGPRCopies to allow the register coalescer to handle the difficult cases. llvm-svn: 375449 --- diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index bdbcc65..4eac031 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1349,6 +1349,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; + + MachineOperand *CurrentKnownM0Val = nullptr; for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; @@ -1361,6 +1363,25 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) tryFoldClamp(MI); + + // Saw an unknown clobber of m0, so we no longer know what it is. + if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) + CurrentKnownM0Val = nullptr; + continue; + } + + // Specially track simple redefs of m0 to the same value in a block, so we + // can erase the later ones. + if (MI.getOperand(0).getReg() == AMDGPU::M0) { + MachineOperand &NewM0Val = MI.getOperand(1); + if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { + MI.eraseFromParent(); + continue; + } + + // We aren't tracking other physical registers + CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ? + nullptr : &NewM0Val; continue; } diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir new file mode 100644 index 0000000..10b49e6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir @@ -0,0 +1,366 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @redef_m0_same_copy() { ret void } + define amdgpu_kernel void @multi_redef_m0_same_copy() { ret void } + define amdgpu_kernel void @redef_m0_different_copy() { ret void } + define amdgpu_kernel void @redef_m0_mixed_copy0() { ret void } + define amdgpu_kernel void @redef_m0_mixed_copy1() { ret void } + define amdgpu_kernel void @redef_m0_same_mov_imm() { ret void } + define amdgpu_kernel void @redef_m0_different_inst0() { ret void } + define amdgpu_kernel void @redef_m0_different_inst1() { ret void } + define amdgpu_kernel void @redef_m0_mixed_read_m0() { ret void } + define amdgpu_kernel void @redef_m0_same_copy_call() { ret void } + define amdgpu_kernel void @redef_m0_same_copy_multi_block() { ret void } + define amdgpu_kernel void @redef_m0_copy_self() { ret void } + define amdgpu_kernel void @redef_m0_copy_physreg() { ret void } + + declare void @func() +... + +--- +name: redef_m0_same_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: multi_redef_m0_same_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: multi_redef_m0_same_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_different_copy + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_copy0 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_copy0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %1 + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_copy1 +tracksRegLiveness: true + +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_copy1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + $m0 = COPY %1 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_same_mov_imm +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_mov_imm + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = S_MOV_B32 -1 + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = S_MOV_B32 -1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = S_MOV_B32 -1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_inst0 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_different_inst0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = IMPLICIT_DEF + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = IMPLICIT_DEF + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_different_inst1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_different_inst1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: S_NOP 0, implicit-def $m0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + S_NOP 0, implicit-def $m0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_mixed_read_m0 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; GCN-LABEL: name: redef_m0_mixed_read_m0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY [[COPY2]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_2:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 128, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + %2:sgpr_32 = COPY $sgpr1 + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY %2 + %5:vgpr_32 = DS_READ_B32 %0, 128, 0, implicit $m0, implicit $exec :: (load 4) +... + +--- +name: redef_m0_same_copy_call +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_same_copy_call + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_same_copy_multi_block +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: redef_m0_same_copy_multi_block + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: bb.1: + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + bb.0: + liveins: $vgpr0, $sgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + + bb.1: + $m0 = COPY %1 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_copy_self +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_copy_self + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY [[COPY1]] + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $m0 = COPY $m0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $m0 = COPY $m0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +... + +--- +name: redef_m0_copy_physreg +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; GCN-LABEL: name: redef_m0_copy_physreg + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN: $m0 = COPY $sgpr0 + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: $sgpr0 = S_MOV_B32 0 + ; GCN: $m0 = COPY $sgpr0 + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY $sgpr0 + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + $sgpr0 = S_MOV_B32 0 + $m0 = COPY $sgpr0 + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + +...