From: Matt Arsenault Date: Sat, 17 Sep 2016 02:02:19 +0000 (+0000) Subject: AMDGPU: Use i64 scalar compare instructions X-Git-Tag: llvmorg-4.0.0-rc1~9499 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7b1dc2c9834fb98455159e49eded58536d0f524a;p=platform%2Fupstream%2Fllvm.git AMDGPU: Use i64 scalar compare instructions VI added eq/ne for i64, so use them. llvm-svn: 281800 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 89215c1..951db65 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -38,18 +38,6 @@ class R600InstrInfo; namespace { -static bool isCBranchSCC(const SDNode *N) { - assert(N->getOpcode() == ISD::BRCOND); - if (!N->hasOneUse()) - return false; - - SDValue Cond = N->getOperand(1); - if (Cond.getOpcode() == ISD::CopyToReg) - Cond = Cond.getOperand(2); - return Cond.getOpcode() == ISD::SETCC && - Cond.getOperand(0).getValueType() == MVT::i32 && Cond.hasOneUse(); -} - /// AMDGPU specific code to select AMDGPU machine instructions for /// SelectionDAG operations. class AMDGPUDAGToDAGISel : public SelectionDAGISel { @@ -150,6 +138,7 @@ private: uint32_t Offset, uint32_t Width); void SelectS_BFEFromShifts(SDNode *N); void SelectS_BFE(SDNode *N); + bool isCBranchSCC(const SDNode *N) const; void SelectBRCOND(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); @@ -1337,6 +1326,32 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { SelectCode(N); } +bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { + assert(N->getOpcode() == ISD::BRCOND); + if (!N->hasOneUse()) + return false; + + SDValue Cond = N->getOperand(1); + if (Cond.getOpcode() == ISD::CopyToReg) + Cond = Cond.getOperand(2); + + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) + return false; + + MVT VT = Cond.getOperand(0).getSimpleValueType(); + if (VT == MVT::i32) + return true; + + if (VT == MVT::i64) { + auto ST = static_cast(Subtarget); + + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); + } + + return false; +} + void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { SDValue Cond = N->getOperand(1); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 6b953ea..1e52e1c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -488,6 +488,10 @@ public: return Has16BitInsts; } + bool hasScalarCompareEq64() const { + return getGeneration() >= VOLCANIC_ISLANDS; + } + bool enableSIScheduler() const { return EnableSIScheduler; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9ca44b8..5426f7f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1930,6 +1930,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; + case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32; + case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32; case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 4451d16..9744cd3 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -630,6 +630,14 @@ class SOPC_CMP_32 op, string opName, let isCommutable = 1; } +class SOPC_CMP_64 op, string opName, + PatLeaf cond = COND_NULL, string revOp = opName> + : SOPC_Helper, + Commutable_REV { + let isCompare = 1; + let isCommutable = 1; +} + class SOPC_32 op, string opName, list pattern = []> : SOPC_Base; @@ -655,6 +663,10 @@ def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">; def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">; def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">; +let SubtargetPredicate = isVI in { +def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>; +def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>; +} //===----------------------------------------------------------------------===// // SOPP Instructions diff --git a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll index 570ffb2..1dda60a 100644 --- a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll +++ b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll @@ -569,5 +569,81 @@ endif: ret void } +; GCN-LABEL: {{^}}br_scc_eq_i64_inline_imm: +; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 4 + +; SI: v_cmp_eq_i64_e64 +define void @br_scc_eq_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i64 %cond, 4 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i64_simm16: +; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2 +; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0 +; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} + +; SI: v_cmp_eq_i64_e32 +define void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i64 %cond, 1234 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ne_i64_inline_imm: +; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 4 + +; SI: v_cmp_ne_i64_e64 +define void @br_scc_ne_i64_inline_imm(i64 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ne i64 %cond, 4 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ne_i64_simm16: +; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2 +; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0 +; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} + +; SI: v_cmp_ne_i64_e32 +define void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ne i64 %cond, 1234 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll index 668a6336..521e189 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -1,16 +1,16 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; SI-LABEL: {{^}}uniform_if_scc: -; SI-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0 -; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 -; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN-LABEL: {{^}}uniform_if_scc: +; GCN-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0 +; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 +; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] ; Fall-through to the else -; SI: v_mov_b32_e32 [[STORE_VAL]], 1 +; GCN: v_mov_b32_e32 [[STORE_VAL]], 1 -; SI: [[IF_LABEL]]: -; SI: buffer_store_dword [[STORE_VAL]] +; GCN: [[IF_LABEL]]: +; GCN: buffer_store_dword [[STORE_VAL]] define void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) { entry: %cmp0 = icmp eq i32 %cond, 0 @@ -28,19 +28,19 @@ done: ret void } -; SI-LABEL: {{^}}uniform_if_vcc: +; GCN-LABEL: {{^}}uniform_if_vcc: ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. -; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; SI-DAG: s_and_b64 vcc, exec, [[COND]] -; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 -; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} +; GCN-DAG: s_and_b64 vcc, exec, [[COND]] +; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 +; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; Fall-through to the else -; SI: v_mov_b32_e32 [[STORE_VAL]], 1 +; GCN: v_mov_b32_e32 [[STORE_VAL]], 1 -; SI: [[IF_LABEL]]: -; SI: buffer_store_dword [[STORE_VAL]] +; GCN: [[IF_LABEL]]: +; GCN: buffer_store_dword [[STORE_VAL]] define void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) { entry: %cmp0 = fcmp oeq float %cond, 0.0 @@ -58,16 +58,16 @@ done: ret void } -; SI-LABEL: {{^}}uniform_if_swap_br_targets_scc: -; SI-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0 -; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 -; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc: +; GCN-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 +; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] ; Fall-through to the else -; SI: v_mov_b32_e32 [[STORE_VAL]], 1 +; GCN: v_mov_b32_e32 [[STORE_VAL]], 1 -; SI: [[IF_LABEL]]: -; SI: buffer_store_dword [[STORE_VAL]] +; GCN: [[IF_LABEL]]: +; GCN: buffer_store_dword [[STORE_VAL]] define void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) { entry: %cmp0 = icmp eq i32 %cond, 0 @@ -85,19 +85,19 @@ done: ret void } -; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc: +; GCN-LABEL: {{^}}uniform_if_swap_br_targets_vcc: ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. -; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; SI-DAG: s_and_b64 vcc, exec, [[COND]] -; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 -; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} +; GCN-DAG: s_and_b64 vcc, exec, [[COND]] +; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 +; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] ; Fall-through to the else -; SI: v_mov_b32_e32 [[STORE_VAL]], 1 +; GCN: v_mov_b32_e32 [[STORE_VAL]], 1 -; SI: [[IF_LABEL]]: -; SI: buffer_store_dword [[STORE_VAL]] +; GCN: [[IF_LABEL]]: +; GCN: buffer_store_dword [[STORE_VAL]] define void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) { entry: %cmp0 = fcmp oeq float %cond, 0.0 @@ -115,16 +115,16 @@ done: ret void } -; SI-LABEL: {{^}}uniform_if_move_valu: -; SI: v_add_f32_e32 [[CMP:v[0-9]+]] +; GCN-LABEL: {{^}}uniform_if_move_valu: +; GCN: v_add_f32_e32 [[CMP:v[0-9]+]] ; Using a floating-point value in an integer compare will cause the compare to ; be selected for the SALU and then later moved to the VALU. -; SI: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] -; SI: s_and_b64 vcc, exec, [[COND]] -; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] -; SI: buffer_store_dword -; SI: [[ENDIF_LABEL]]: -; SI: s_endpgm +; GCN: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] +; GCN: s_and_b64 vcc, exec, [[COND]] +; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: buffer_store_dword +; GCN: [[ENDIF_LABEL]]: +; GCN: s_endpgm define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) { entry: %a.0 = fadd float %a, 10.0 @@ -140,16 +140,16 @@ endif: ret void } -; SI-LABEL: {{^}}uniform_if_move_valu_commute: -; SI: v_add_f32_e32 [[CMP:v[0-9]+]] +; GCN-LABEL: {{^}}uniform_if_move_valu_commute: +; GCN: v_add_f32_e32 [[CMP:v[0-9]+]] ; Using a floating-point value in an integer compare will cause the compare to ; be selected for the SALU and then later moved to the VALU. -; SI: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] -; SI: s_and_b64 vcc, exec, [[COND]] -; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] -; SI: buffer_store_dword -; SI: [[ENDIF_LABEL]]: -; SI: s_endpgm +; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] +; GCN: s_and_b64 vcc, exec, [[COND]] +; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: buffer_store_dword +; GCN: [[ENDIF_LABEL]]: +; GCN: s_endpgm define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) { entry: %a.0 = fadd float %a, 10.0 @@ -166,18 +166,18 @@ endif: } -; SI-LABEL: {{^}}uniform_if_else_ret: -; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 -; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN-LABEL: {{^}}uniform_if_else_ret: +; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] -; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 -; SI: buffer_store_dword [[TWO]] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 +; GCN: buffer_store_dword [[TWO]] +; GCN: s_endpgm -; SI: {{^}}[[IF_LABEL]]: -; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; SI: buffer_store_dword [[ONE]] -; SI: s_endpgm +; GCN: {{^}}[[IF_LABEL]]: +; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN: buffer_store_dword [[ONE]] +; GCN: s_endpgm define void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) { entry: %cmp = icmp eq i32 %a, 0 @@ -195,22 +195,22 @@ if.end: ; preds = %if.else, %if.then ret void } -; SI-LABEL: {{^}}uniform_if_else: -; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 -; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] +; GCN-LABEL: {{^}}uniform_if_else: +; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] -; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 -; SI: buffer_store_dword [[TWO]] -; SI: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 +; GCN: buffer_store_dword [[TWO]] +; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]] -; SI: [[IF_LABEL]]: -; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; SI: buffer_store_dword [[ONE]] +; GCN: [[IF_LABEL]]: +; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN: buffer_store_dword [[ONE]] -; SI: [[ENDIF_LABEL]]: -; SI: v_mov_b32_e32 [[THREE:v[0-9]+]], 3 -; SI: buffer_store_dword [[THREE]] -; SI: s_endpgm +; GCN: [[ENDIF_LABEL]]: +; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3 +; GCN: buffer_store_dword [[THREE]] +; GCN: s_endpgm define void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) { entry: %cmp = icmp eq i32 %a, 0 @@ -229,12 +229,12 @@ if.end: ; preds = %if.else, %if.then ret void } -; SI-LABEL: {{^}}icmp_2_users: -; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1 -; SI: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]] -; SI: buffer_store_dword -; SI: [[LABEL]]: -; SI: s_endpgm +; GCN-LABEL: {{^}}icmp_2_users: +; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1 +; GCN: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]] +; GCN: buffer_store_dword +; GCN: [[LABEL]]: +; GCN: s_endpgm define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) { main_body: %0 = icmp sgt i32 %cond, 0 @@ -249,16 +249,16 @@ ENDIF: ; preds = %IF, %main_body ret void } -; SI-LABEL: {{^}}icmp_users_different_blocks: -; SI: s_load_dword [[COND:s[0-9]+]] -; SI: s_cmp_lt_i32 [[COND]], 1 -; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] -; SI: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}} -; SI: s_and_b64 vcc, exec, [[MASK]] -; SI: s_cbranch_vccnz [[EXIT]] -; SI: buffer_store -; SI: {{^}}[[EXIT]]: -; SI: s_endpgm +; GCN-LABEL: {{^}}icmp_users_different_blocks: +; GCN: s_load_dword [[COND:s[0-9]+]] +; GCN: s_cmp_lt_i32 [[COND]], 1 +; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] +; GCN: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}} +; GCN: s_and_b64 vcc, exec, [[MASK]] +; GCN: s_cbranch_vccnz [[EXIT]] +; GCN: buffer_store +; GCN: {{^}}[[EXIT]]: +; GCN: s_endpgm define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -279,15 +279,15 @@ bb9: ; preds = %bb8, %bb4 ret void } -; SI-LABEL: {{^}}uniform_loop: -; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]: -; FIXME: We need to teach SIFixSGPRCopies about uniform branches so we +; GCN-LABEL: {{^}}uniform_loop: +; GCN: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]: +; FIXME: We need to teach GCNFixSGPRCopies about uniform branches so we ; get s_add_i32 here. -; SI: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}} -; SI: v_cmp_ne_i32_e32 vcc, 0, [[I]] -; SI: s_and_b64 vcc, exec, vcc -; SI: s_cbranch_vccnz [[LOOP_LABEL]] -; SI: s_endpgm +; GCN: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}} +; GCN: v_cmp_ne_i32_e32 vcc, 0, [[I]] +; GCN: s_and_b64 vcc, exec, vcc +; GCN: s_cbranch_vccnz [[LOOP_LABEL]] +; GCN: s_endpgm define void @uniform_loop(i32 addrspace(1)* %out, i32 %a) { entry: br label %loop @@ -304,15 +304,15 @@ done: ; Test uniform and divergent. -; SI-LABEL: {{^}}uniform_inside_divergent: -; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} -; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc -; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] -; SI: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] -; SI: s_cmp_lg_i32 {{s[0-9]+}}, 0 -; SI: s_cbranch_scc1 [[ENDIF_LABEL]] -; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; SI: buffer_store_dword [[ONE]] +; GCN-LABEL: {{^}}uniform_inside_divergent: +; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} +; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] +; GCN: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: s_cmp_lg_i32 {{s[0-9]+}}, 0 +; GCN: s_cbranch_scc1 [[ENDIF_LABEL]] +; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN: buffer_store_dword [[ONE]] define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -332,16 +332,16 @@ endif: ret void } -; SI-LABEL: {{^}}divergent_inside_uniform: -; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 -; SI: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] -; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} -; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc -; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] -; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; SI: buffer_store_dword [[ONE]] -; SI: [[ENDIF_LABEL]]: -; SI: s_endpgm +; GCN-LABEL: {{^}}divergent_inside_uniform: +; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] +; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} +; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] +; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN: buffer_store_dword [[ONE]] +; GCN: [[ENDIF_LABEL]]: +; GCN: s_endpgm define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { entry: %u_cmp = icmp eq i32 %cond, 0 @@ -361,19 +361,19 @@ endif: ret void } -; SI-LABEL: {{^}}divergent_if_uniform_if: -; SI: v_cmp_eq_i32_e32 vcc, 0, v0 -; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc -; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] -; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 -; SI: buffer_store_dword [[ONE]] -; SI: s_or_b64 exec, exec, [[MASK]] -; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 -; SI: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] -; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 -; SI: buffer_store_dword [[TWO]] -; SI: [[EXIT]]: -; SI: s_endpgm +; GCN-LABEL: {{^}}divergent_if_uniform_if: +; GCN: v_cmp_eq_i32_e32 vcc, 0, v0 +; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] +; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN: buffer_store_dword [[ONE]] +; GCN: s_or_b64 exec, exec, [[MASK]] +; GCN: s_cmp_lg_i32 s{{[0-9]+}}, 0 +; GCN: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] +; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 +; GCN: buffer_store_dword [[TWO]] +; GCN: [[EXIT]]: +; GCN: s_endpgm define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -401,19 +401,19 @@ exit: ; the first, leaving an scc use in a different block than it was ; defed. -; SI-LABEL: {{^}}cse_uniform_condition_different_blocks: -; SI: s_load_dword [[COND:s[0-9]+]] -; SI: s_cmp_lt_i32 [[COND]], 1 -; SI: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 +; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks: +; GCN: s_load_dword [[COND:s[0-9]+]] +; GCN: s_cmp_lt_i32 [[COND]], 1 +; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 -; SI: BB#1: -; SI-NOT: cmp -; SI: buffer_load_dword -; SI: buffer_store_dword -; SI: s_cbranch_scc1 BB[[FNNUM]]_3 +; GCN: BB#1: +; GCN-NOT: cmp +; GCN: buffer_load_dword +; GCN: buffer_store_dword +; GCN: s_cbranch_scc1 BB[[FNNUM]]_3 -; SI: BB[[FNNUM]]_3: -; SI: s_endpgm +; GCN: BB[[FNNUM]]_3: +; GCN: s_endpgm define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -434,6 +434,137 @@ bb9: ; preds = %bb8, %bb4 ret void } +; GCN-LABEL: {{^}}uniform_if_scc_i64_eq: +; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 +; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 + +; SI: v_cmp_eq_i64_e64 +; SI: s_and_b64 vcc, exec, +; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] + +; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] + +; Fall-through to the else +; GCN: v_mov_b32_e32 [[STORE_VAL]], 1 + +; GCN: [[IF_LABEL]]: +; GCN: buffer_store_dword [[STORE_VAL]] +define void @uniform_if_scc_i64_eq(i64 %cond, i32 addrspace(1)* %out) { +entry: + %cmp0 = icmp eq i64 %cond, 0 + br i1 %cmp0, label %if, label %else + +if: + br label %done + +else: + br label %done + +done: + %value = phi i32 [0, %if], [1, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}uniform_if_scc_i64_ne: +; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 +; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 + +; SI: v_cmp_ne_i64_e64 +; SI: s_and_b64 vcc, exec, +; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] + +; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] + +; Fall-through to the else +; GCN: v_mov_b32_e32 [[STORE_VAL]], 1 + +; GCN: [[IF_LABEL]]: +; GCN: buffer_store_dword [[STORE_VAL]] +define void @uniform_if_scc_i64_ne(i64 %cond, i32 addrspace(1)* %out) { +entry: + %cmp0 = icmp ne i64 %cond, 0 + br i1 %cmp0, label %if, label %else + +if: + br label %done + +else: + br label %done + +done: + %value = phi i32 [0, %if], [1, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt: +; GCN: v_cmp_gt_i64_e64 +; GCN: s_and_b64 vcc, exec, +; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] + +; Fall-through to the else +; GCN: v_mov_b32_e32 [[STORE_VAL]], 1 + +; GCN: [[IF_LABEL]]: +; GCN: buffer_store_dword [[STORE_VAL]] +define void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) { +entry: + %cmp0 = icmp sgt i64 %cond, 0 + br i1 %cmp0, label %if, label %else + +if: + br label %done + +else: + br label %done + +done: + %value = phi i32 [0, %if], [1, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}move_to_valu_i64_eq: +; SI: v_cmp_eq_i64_e32 +; VI: v_cmp_eq_u64_e32 +define void @move_to_valu_i64_eq(i32 addrspace(1)* %out) { + %cond = load volatile i64, i64 addrspace(3)* undef + %cmp0 = icmp eq i64 %cond, 0 + br i1 %cmp0, label %if, label %else + +if: + br label %done + +else: + br label %done + +done: + %value = phi i32 [0, %if], [1, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}move_to_valu_i64_ne: +; SI: v_cmp_ne_i64_e32 +; VI: v_cmp_ne_u64_e32 +define void @move_to_valu_i64_ne(i32 addrspace(1)* %out) { + %cond = load volatile i64, i64 addrspace(3)* undef + %cmp0 = icmp ne i64 %cond, 0 + br i1 %cmp0, label %if, label %else + +if: + br label %done + +else: + br label %done + +done: + %value = phi i32 [0, %if], [1, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 -attributes #0 = { readnone } +attributes #0 = { nounwind readnone } diff --git a/llvm/test/MC/AMDGPU/sopc.s b/llvm/test/MC/AMDGPU/sopc.s index b89488c..ef4351d 100644 --- a/llvm/test/MC/AMDGPU/sopc.s +++ b/llvm/test/MC/AMDGPU/sopc.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN %s -// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck --check-prefix=GCN %s -// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=GCN %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s +// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=NOSICI %s //===----------------------------------------------------------------------===// // SOPC Instructions @@ -56,3 +56,11 @@ s_bitcmp1_b64 s[2:3], s4 s_setvskip s3, s5 // GCN: s_setvskip s3, s5 ; encoding: [0x03,0x05,0x10,0xbf] + +s_cmp_eq_u64 s[0:1], s[2:3] +// VI: s_cmp_eq_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x12,0xbf] +// NOSICI: error: instruction not supported on this GPU + +s_cmp_lg_u64 s[0:1], s[2:3] +// VI: s_cmp_lg_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x13,0xbf] +// NOSICI: error: instruction not supported on this GPU