From 43160e7af2451111917b10348e5a8dabcd5f80d0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 18 Jun 2014 17:13:57 +0000 Subject: [PATCH] R600/SI: Add intrinsics for brev instructions llvm-svn: 211187 --- llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 4 ++++ llvm/lib/Target/R600/AMDGPUISelLowering.h | 1 + llvm/lib/Target/R600/AMDGPUInstrInfo.td | 2 ++ llvm/lib/Target/R600/AMDGPUIntrinsics.td | 1 + llvm/lib/Target/R600/SIInstrInfo.cpp | 1 + llvm/lib/Target/R600/SIInstructions.td | 4 +++- llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll | 27 +++++++++++++++++++++++++++ 7 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 1e6f38f..ac5d790 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -900,6 +900,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_brev: + return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDIL_exp: // Legacy name. return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); @@ -2026,6 +2029,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) NODE_NAME_CASE(BFM) + NODE_NAME_CASE(BREV) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) NODE_NAME_CASE(MAD_U24) diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index 34e36d8..5be3070 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -188,6 +188,7 @@ enum { BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) BFM, // Insert a range of bits into a 32-bit word. + BREV, // Reverse bits. MUL_U24, MUL_I24, MAD_U24, diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/llvm/lib/Target/R600/AMDGPUInstrInfo.td index 5cb2559..942a9e8 100644 --- a/llvm/lib/Target/R600/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.td @@ -105,6 +105,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; +def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>; + // Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when // performing the mulitply. The result is a 32-bit value. def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, diff --git a/llvm/lib/Target/R600/AMDGPUIntrinsics.td b/llvm/lib/Target/R600/AMDGPUIntrinsics.td index 4ef23ab..6dc7612 100644 --- a/llvm/lib/Target/R600/AMDGPUIntrinsics.td +++ b/llvm/lib/Target/R600/AMDGPUIntrinsics.td @@ -64,6 +64,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_barrier_local : Intrinsic<[], [], []>; def int_AMDGPU_barrier_global : Intrinsic<[], [], []>; } diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index e06dd1d..f5b82d5 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -664,6 +664,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; + case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 61cd2be..428e49c 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -101,7 +101,9 @@ def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", >; def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; -def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; +def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", + [(set i32:$dst, (AMDGPUbrev i32:$src0))] +>; def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; } // End neverHasSideEffects = 1 diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll new file mode 100644 index 0000000..68a5ad0 --- /dev/null +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone + +; FUNC-LABEL: @s_brev_i32: +; SI: S_LOAD_DWORD [[VAL:s[0-9]+]], +; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]] +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]], +; SI: S_ENDPGM +define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone + store i32 %ctlz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_brev_i32: +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], +; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { + %val = load i32 addrspace(1)* %valptr, align 4 + %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone + store i32 %ctlz, i32 addrspace(1)* %out, align 4 + ret void +} -- 2.7.4