From 4842c052168157142c0d99dfbe9f35a84469b790 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 7 Jan 2015 20:27:25 +0000 Subject: [PATCH] R600/SI: Add a V_MOV_B64 pseudo instruction This is used to simplify the SIFoldOperands pass and make it easier to fold immediates. llvm-svn: 225373 --- llvm/lib/Target/R600/SIFoldOperands.cpp | 1 + llvm/lib/Target/R600/SIInstrInfo.cpp | 31 ++++++++++++++++++++++++ llvm/lib/Target/R600/SIInstructions.td | 6 +++++ llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll | 10 ++++---- llvm/test/CodeGen/R600/imm.ll | 7 ++---- llvm/test/CodeGen/R600/local-atomics64.ll | 32 ++++++++++--------------- 6 files changed, 57 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/R600/SIFoldOperands.cpp b/llvm/lib/Target/R600/SIFoldOperands.cpp index 23d4a4d..ddb2852 100644 --- a/llvm/lib/Target/R600/SIFoldOperands.cpp +++ b/llvm/lib/Target/R600/SIFoldOperands.cpp @@ -86,6 +86,7 @@ static bool isSafeToFold(unsigned Opcode) { switch(Opcode) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_B64_PSEUDO: case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 08dd425..aa9e1a3 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -424,6 +424,8 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { return AMDGPU::S_MOV_B64; + } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { + return AMDGPU::V_MOV_B64_PSEUDO; } return AMDGPU::COPY; } @@ -672,6 +674,35 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { // This is just a placeholder for register allocation. MI->eraseFromParent(); break; + + case AMDGPU::V_MOV_B64_PSEUDO: { + unsigned Dst = MI->getOperand(0).getReg(); + unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); + unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); + + const MachineOperand &SrcOp = MI->getOperand(1); + // FIXME: Will this work for 64-bit floating point immediates? + assert(!SrcOp.isFPImm()); + if (SrcOp.isImm()) { + APInt Imm(64, SrcOp.getImm()); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) + .addImm(Imm.getLoBits(32).getZExtValue()) + .addReg(Dst, RegState::Implicit); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) + .addImm(Imm.getHiBits(32).getZExtValue()) + .addReg(Dst, RegState::Implicit); + } else { + assert(SrcOp.isReg()); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) + .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) + .addReg(Dst, RegState::Implicit); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) + .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) + .addReg(Dst, RegState::Implicit); + } + MI->eraseFromParent(); + break; + } } return true; } diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index c130a76..4a4c94c 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1742,6 +1742,12 @@ defm V_TRIG_PREOP_F64 : VOP3Inst < //===----------------------------------------------------------------------===// let isCodeGenOnly = 1, isPseudo = 1 in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +// 64-bit vector move instruction. This is mainly used by the SIFoldOperands +// pass to enable folding of inline immediates. +def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>; +} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0 + let hasSideEffects = 1 in { def SGPR_USE : InstSI <(outs),(ins), "", []>; } diff --git a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll index 8a5a3b6..0d5ece4 100644 --- a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll +++ b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -20,9 +20,8 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset: ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7 -; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]] -; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]] +; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 +; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] ; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] @@ -69,9 +68,8 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset: ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 ; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_mov_b64 s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7 -; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]] -; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]] +; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 +; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] ; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] diff --git a/llvm/test/CodeGen/R600/imm.ll b/llvm/test/CodeGen/R600/imm.ll index 0b9d557..416462b 100644 --- a/llvm/test/CodeGen/R600/imm.ll +++ b/llvm/test/CodeGen/R600/imm.ll @@ -474,12 +474,9 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) { } -; FIXME: These shoudn't bother materializing in SGPRs - ; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64 -; CHECK: s_mov_b64 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, 0{{$}} -; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] -; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0 +; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { store double 0.0, double addrspace(1)* %out diff --git a/llvm/test/CodeGen/R600/local-atomics64.ll b/llvm/test/CodeGen/R600/local-atomics64.ll index 0e5a94f..ce6ddbd 100644 --- a/llvm/test/CodeGen/R600/local-atomics64.ll +++ b/llvm/test/CodeGen/R600/local-atomics64.ll @@ -30,9 +30,8 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset: ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] ; SI: buffer_store_dwordx2 [[RESULT]], @@ -45,9 +44,8 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 ; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; SI: buffer_store_dwordx2 [[RESULT]], ; SI: s_endpgm @@ -87,9 +85,8 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 ; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; SI: buffer_store_dwordx2 [[RESULT]], ; SI: s_endpgm @@ -277,10 +274,9 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset: ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] ; SI: s_endpgm define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -290,9 +286,8 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 ; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; SI: s_endpgm define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -327,9 +322,8 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64: -; SI: s_mov_b64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1 -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]] -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]] +; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 ; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; SI: s_endpgm define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind { -- 2.7.4