From: Marek Olsak Date: Fri, 25 Nov 2016 16:03:15 +0000 (+0000) Subject: Revert "AMDGPU: Make m0 unallocatable" X-Git-Tag: llvmorg-4.0.0-rc1~3665 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a45dae458d1d557860f51bd6ebea508bb8ef1a54;p=platform%2Fupstream%2Fllvm.git Revert "AMDGPU: Make m0 unallocatable" This reverts commit 124ad83dae04514f943902446520c859adee0e96. llvm-svn: 287932 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index bf493c9..4d55a66 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -253,7 +253,7 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { switch (NumVectorElts) { case 1: - return AMDGPU::SReg_32_XM0RegClassID; + return AMDGPU::SReg_32RegClassID; case 2: return AMDGPU::SReg_64RegClassID; case 4: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 04edc91..6deee51 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -59,7 +59,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass); addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); @@ -79,8 +79,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass); if (Subtarget->has16BitInsts()) { - addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass); - addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); } computeRegisterProperties(STI.getRegisterInfo()); @@ -941,25 +941,25 @@ SDValue SITargetLowering::LowerFormalArguments( // Start adding system SGPRs. if (Info->hasWorkGroupIDX()) { unsigned Reg = Info->addWorkGroupIDX(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); CCInfo.AllocateReg(Reg); } if (Info->hasWorkGroupIDY()) { unsigned Reg = Info->addWorkGroupIDY(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); CCInfo.AllocateReg(Reg); } if (Info->hasWorkGroupIDZ()) { unsigned Reg = Info->addWorkGroupIDZ(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); CCInfo.AllocateReg(Reg); } if (Info->hasWorkGroupInfo()) { unsigned Reg = Info->addWorkGroupInfo(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); CCInfo.AllocateReg(Reg); } @@ -2414,15 +2414,15 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SI::KernelInputOffsets::LOCAL_SIZE_Z); case Intrinsic::amdgcn_workgroup_id_x: case Intrinsic::r600_read_tgid_x: - return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass, + return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT); case Intrinsic::amdgcn_workgroup_id_y: case Intrinsic::r600_read_tgid_y: - return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass, + return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT); case Intrinsic::amdgcn_workgroup_id_z: case Intrinsic::r600_read_tgid_z: - return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass, + return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT); case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::r600_read_tidig_x: @@ -4182,7 +4182,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, default: return std::make_pair(0U, nullptr); case 32: - return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass); + return std::make_pair(0U, &AMDGPU::SReg_32RegClass); case 64: return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); case 128: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 85eca55..e9fbde1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -364,8 +364,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (RC == &AMDGPU::SReg_32_XM0RegClass || - RC == &AMDGPU::SReg_32RegClass) { + if (RC == &AMDGPU::SReg_32RegClass) { if (SrcReg == AMDGPU::SCC) { BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) .addImm(-1) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 0ba76c7..94c07ff 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -243,7 +243,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addImm(Offset); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5bdd8be..d1907d1 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -120,11 +120,6 @@ def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> { let isAllocatable = 0; } -def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { - let CopyCost = 1; - let isAllocatable = 0; -} - // TODO: Do we need to set DwarfRegAlias on register tuples? // SGPR 32-bit registers @@ -264,9 +259,8 @@ def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32, // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32, - (add SReg_32_XM0, M0_CLASS)> { + (add SReg_32_XM0, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)> { let AllocationPriority = 1; - let isAllocatable = 0; } def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> { diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 76b0b45..2151838 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -437,7 +437,7 @@ bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const { MachineBasicBlock::iterator SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before) { - unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr *Save = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg) diff --git a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir index 9d70f67..057c663 100644 --- a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -27,9 +27,9 @@ # CHECK: S_NOP 0, implicit undef %5.sub0 name: test0 registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } - - { id: 2, class: sreg_32_xm0 } + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } - { id: 3, class: sreg_128 } - { id: 4, class: sreg_64 } - { id: 5, class: sreg_64 } @@ -87,13 +87,13 @@ registers: - { id: 0, class: sreg_128 } - { id: 1, class: sreg_128 } - { id: 2, class: sreg_64 } - - { id: 3, class: sreg_32_xm0 } + - { id: 3, class: sreg_32 } - { id: 4, class: sreg_128 } - { id: 5, class: sreg_64 } - - { id: 6, class: sreg_32_xm0 } - - { id: 7, class: sreg_32_xm0 } + - { id: 6, class: sreg_32 } + - { id: 7, class: sreg_32 } - { id: 8, class: sreg_64 } - - { id: 9, class: sreg_32_xm0 } + - { id: 9, class: sreg_32 } - { id: 10, class: sreg_128 } body: | bb.0: @@ -162,12 +162,12 @@ body: | name: test2 registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } - { id: 2, class: sreg_64 } - { id: 3, class: sreg_128 } - - { id: 4, class: sreg_32_xm0 } - - { id: 5, class: sreg_32_xm0 } + - { id: 4, class: sreg_32 } + - { id: 5, class: sreg_32 } - { id: 6, class: sreg_64 } - { id: 7, class: sreg_128 } - { id: 8, class: sreg_64 } @@ -260,7 +260,7 @@ body: | name: test5 tracksRegLiveness: true registers: - - { id: 0, class: sreg_32_xm0 } + - { id: 0, class: sreg_32 } - { id: 1, class: sreg_64 } body: | bb.0: @@ -286,9 +286,9 @@ body: | name: loop0 tracksRegLiveness: true registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } - - { id: 2, class: sreg_32_xm0 } + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } - { id: 3, class: sreg_128 } - { id: 4, class: sreg_128 } - { id: 5, class: sreg_128 } @@ -339,10 +339,10 @@ body: | name: loop1 tracksRegLiveness: true registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } - - { id: 2, class: sreg_32_xm0 } - - { id: 3, class: sreg_32_xm0 } + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } + - { id: 3, class: sreg_32 } - { id: 4, class: sreg_128 } - { id: 5, class: sreg_128 } - { id: 6, class: sreg_128 } @@ -390,7 +390,7 @@ body: | name: loop2 tracksRegLiveness: true registers: - - { id: 0, class: sreg_32_xm0 } + - { id: 0, class: sreg_32 } - { id: 1, class: sreg_128 } - { id: 2, class: sreg_128 } - { id: 3, class: sreg_128 } diff --git a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll index 1bcbd14..3c0bb75 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll @@ -22,11 +22,10 @@ entry: ret void } -; FIXME: Should be able to avoid copy ; GCN-LABEL: {{^}}inline_sreg_constraint_m0: ; GCN: s_mov_b32 m0, -1 -; GCN: s_mov_b32 [[COPY_M0:s[0-9]+]], m0 -; GCN: ; use [[COPY_M0]] +; GCN-NOT: s_mov_b32 s{{[0-9]+}}, m0 +; GCN: ; use m0 define void @inline_sreg_constraint_m0() { %m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"() tail call void asm sideeffect "; use $0", "s"(i32 %m0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll index 2569108..09732ff 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll @@ -22,8 +22,7 @@ define void @test_readfirstlane_imm(i32 addrspace(1)* %out) #1 { ; TODO: m0 should be folded. ; CHECK-LABEL: {{^}}test_readfirstlane_m0: ; CHECK: s_mov_b32 m0, -1 -; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0 -; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]] +; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0 ; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, [[VVAL]] define void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 { %m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll index a9d52b0..923cd72 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll @@ -22,8 +22,7 @@ define void @test_readlane_imm_sreg(i32 addrspace(1)* %out, i32 %src1) #1 { ; TODO: m0 should be folded. ; CHECK-LABEL: {{^}}test_readlane_m0_sreg: ; CHECK: s_mov_b32 m0, -1 -; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0 -; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]] +; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0 ; CHECK: v_readlane_b32 s{{[0-9]+}}, [[VVAL]], s{{[0-9]+}} define void @test_readlane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 { %m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"() diff --git a/llvm/test/CodeGen/AMDGPU/read_register.ll b/llvm/test/CodeGen/AMDGPU/read_register.ll index 601a0ad..58a9e34 100644 --- a/llvm/test/CodeGen/AMDGPU/read_register.ll +++ b/llvm/test/CodeGen/AMDGPU/read_register.ll @@ -3,11 +3,9 @@ declare i32 @llvm.read_register.i32(metadata) #0 declare i64 @llvm.read_register.i64(metadata) #0 -; FIXME: Should be able to eliminate copy ; CHECK-LABEL: {{^}}test_read_m0: ; CHECK: s_mov_b32 m0, -1 -; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0 -; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], [[COPY_M0]] +; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0 ; CHECK: buffer_store_dword [[COPY]] define void @test_read_m0(i32 addrspace(1)* %out) #0 { store volatile i32 0, i32 addrspace(3)* undef diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll index 548735f..c5ef75e 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -9,39 +9,38 @@ ; GCN-LABEL: {{^}}spill_m0: ; TOSMEM: s_mov_b32 s84, SCRATCH_RSRC_DWORD0 -; GCN-DAG: s_cmp_lg_u32 +; GCN: s_cmp_lg_u32 -; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 +; TOVGPR: s_mov_b32 vcc_hi, m0 +; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], vcc_hi, 0 -; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]] +; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0 ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill ; TOVMEM: s_waitcnt vmcnt(0) -; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 +; TOSMEM: s_mov_b32 vcc_hi, m0 ; TOSMEM: s_mov_b32 m0, s3{{$}} -; TOSMEM-NOT: [[M0_COPY]] -; TOSMEM: s_buffer_store_dword [[M0_COPY]], s[84:87], m0 ; 4-byte Folded Spill +; TOSMEM-NOT: vcc_hi +; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Spill ; TOSMEM: s_waitcnt lgkmcnt(0) ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ENDIF]]: -; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0 -; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]] +; TOVGPR: v_readlane_b32 vcc_hi, [[SPILL_VREG]], 0 +; TOVGPR: s_mov_b32 m0, vcc_hi ; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Reload ; TOVMEM: s_waitcnt vmcnt(0) -; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]] -; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]] +; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]] +; TOVMEM: s_mov_b32 m0, vcc_hi ; TOSMEM: s_mov_b32 m0, s3{{$}} -; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s[84:87], m0 ; 4-byte Folded Reload -; TOSMEM-NOT: [[M0_RESTORE]] -; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]] +; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Reload +; TOSMEM-NOT: vcc_hi +; TOSMEM: s_mov_b32 m0, vcc_hi -; GCN: s_add_i32 s{{[0-9]+}}, m0, 1 +; GCN: s_add_i32 m0, m0, 1 define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 { entry: %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir index 0c08deb..016a6e6 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir @@ -6,14 +6,14 @@ name: phi_visit_order tracksRegLiveness: true registers: - - { id: 0, class: sreg_32_xm0 } + - { id: 0, class: sreg_32 } - { id: 1, class: sreg_64 } - - { id: 2, class: sreg_32_xm0 } + - { id: 2, class: sreg_32 } - { id: 7, class: vgpr_32 } - - { id: 8, class: sreg_32_xm0 } + - { id: 8, class: sreg_32 } - { id: 9, class: vgpr_32 } - { id: 10, class: sreg_64 } - - { id: 11, class: sreg_32_xm0 } + - { id: 11, class: sreg_32 } body: | ; GCN-LABEL: name: phi_visit_order