From: Matt Arsenault Date: Mon, 18 Jul 2016 18:35:05 +0000 (+0000) Subject: AMDGPU: Add intrinsic for s_flbit_i32/v_ffbh_i32 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c96e1deffa3d4294483ef70bde0f781369afa770;p=platform%2Fupstream%2Fllvm.git AMDGPU: Add intrinsic for s_flbit_i32/v_ffbh_i32 llvm-svn: 275871 --- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9bf2a4d..387335c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -174,6 +174,11 @@ def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] >; +// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz +// should be used. +def int_amdgcn_sffbh : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; + // TODO: Do we want an ordering for these? def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, LLVMMatchType<0>], diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index c2c7585..f937694 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -266,6 +266,7 @@ enum NodeType : unsigned { BFI, // (src0 & src1) | (~src0 & src2) BFM, // Insert a range of bits into a 32-bit word. FFBH_U32, // ctlz with -1 if input is zero. + FFBH_I32, MUL_U24, MUL_I24, MAD_U24, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 2b13bb9..cf087b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -202,6 +202,7 @@ def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; +def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; // Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when // performing the mulitply. The result is a 32-bit value. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td index 2127391..7017d66 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -16,6 +16,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_kilp : Intrinsic<[], [], []>; + + // Deprecated in favor of llvm.amdgcn.sffbh def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; // Deprecated in favor of separate int_amdgcn_cube* intrinsics. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 51241cf..033b940 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1896,6 +1896,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0, Denominator, Numerator); } + case Intrinsic::amdgcn_sffbh: + case AMDGPUIntrinsic::AMDGPU_flbit_i32: // Legacy name. + return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1)); default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 6427db8..9c1b834 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -139,7 +139,7 @@ defm S_FLBIT_I32_B32 : SOP1_32 , "s_flbit_i32_b32", defm S_FLBIT_I32_B64 : SOP1_32_64 , "s_flbit_i32_b64", []>; defm S_FLBIT_I32 : SOP1_32 , "s_flbit_i32", - [(set i32:$sdst, (int_AMDGPU_flbit_i32 i32:$src0))] + [(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))] >; defm S_FLBIT_I32_I64 : SOP1_32_64 , "s_flbit_i32_i64", []>; defm S_SEXT_I32_I8 : SOP1_32 , "s_sext_i32_i8", diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll deleted file mode 100644 index 20c7af8..0000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s - -declare i32 @llvm.AMDGPU.flbit.i32(i32) nounwind readnone - -; FUNC-LABEL: {{^}}s_flbit: -; SI: s_load_dword [[VAL:s[0-9]+]], -; SI: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]] -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] -; SI: buffer_store_dword [[VRESULT]], -; SI: s_endpgm -define void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) nounwind { - %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone - store i32 %r, i32 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}v_flbit: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm -define void @v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %val = load i32, i32 addrspace(1)* %valptr, align 4 - %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone - store i32 %r, i32 addrspace(1)* %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll new file mode 100644 index 0000000..a348af2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.sffbh.i32(i32) #1 +declare i32 @llvm.AMDGPU.flbit.i32(i32) #1 + +; FUNC-LABEL: {{^}}s_flbit: +; GCN: s_load_dword [[VAL:s[0-9]+]], +; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +define void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) #0 { + %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val) + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_flbit: +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[RESULT]], +define void @v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 { + %val = load i32, i32 addrspace(1)* %valptr, align 4 + %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val) + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}legacy_s_flbit: +; GCN: s_load_dword [[VAL:s[0-9]+]], +; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +; GCN: s_endpgm +define void @legacy_s_flbit(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}legacy_v_flbit: +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm +define void @legacy_v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { + %val = load i32, i32 addrspace(1)* %valptr, align 4 + %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }