From: Changpeng Fang Date: Tue, 1 Mar 2016 17:51:23 +0000 (+0000) Subject: AMDGPU/SI: Implement DS_PERMUTE/DS_BPERMUTE Instruction Definitions and Intrinsics X-Git-Tag: llvmorg-3.9.0-rc1~12803 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=24f035af32effb159cc001feff198e070d2d8431;p=platform%2Fupstream%2Fllvm.git AMDGPU/SI: Implement DS_PERMUTE/DS_BPERMUTE Instruction Definitions and Intrinsics Summary: This patch impleemnts DS_PERMUTE/DS_BPERMUTE instruction definitions and intrinsics, which are new since VI. Reviewers: tstellarAMD, arsenm Subscribers: llvm-commits, arsenm Differential Revision: http://reviews.llvm.org/D17614 llvm-svn: 262356 --- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 59a1525..b9e14fe 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -258,4 +258,13 @@ def int_amdgcn_s_dcache_wb_vol : def int_amdgcn_s_memrealtime : GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, Intrinsic<[llvm_i64_ty], [], []>; + +// llvm.amdgcn.ds.permute +def int_amdgcn_ds_permute : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + +// llvm.amdgcn.ds.bpermute +def int_amdgcn_ds_bpermute : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 480f018..f1aa4cc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -224,6 +224,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, // will use this for some partially aligned loads. const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, AMDGPU::OpName::offset0); + // DS_PERMUTE does not have Offset0Imm (and Offset1Imm). + if (!Offset0Imm) + return false; + const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, AMDGPU::OpName::offset1); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 9e3cc81..094b34d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2409,6 +2409,23 @@ multiclass DS_1A1D_RET op, string opName, RegisterClass rc, } } +multiclass DS_1A1D_PERMUTE op, string opName, RegisterClass rc, + SDPatternOperator node = null_frag, + dag outs = (outs rc:$vdst), + dag ins = (ins VGPR_32:$addr, rc:$data0), + string asm = opName#" $vdst, $addr, $data0"> { + + let mayLoad = 0, mayStore = 0, isConvergent = 1 in { + def "" : DS_Pseudo ; + + let data1 = 0, offset0 = 0, offset1 = 0, gds = 0 in { + def "_vi" : DS_Real_vi ; + } + } +} + multiclass DS_1A2D_RET_m op, string opName, RegisterClass rc, string noRetOp = "", dag ins, dag outs = (outs rc:$vdst), diff --git a/llvm/lib/Target/AMDGPU/VIInstructions.td b/llvm/lib/Target/AMDGPU/VIInstructions.td index b998b8a..4b8ce64 100644 --- a/llvm/lib/Target/AMDGPU/VIInstructions.td +++ b/llvm/lib/Target/AMDGPU/VIInstructions.td @@ -136,4 +136,15 @@ def : Pat < (S_MEMREALTIME) >; +//===----------------------------------------------------------------------===// +// DS_PERMUTE/DS_BPERMUTE Instructions. +//===----------------------------------------------------------------------===// + +let Uses = [EXEC] in { +defm DS_PERMUTE_B32 : DS_1A1D_PERMUTE <0x3e, "ds_permute_b32", VGPR_32, + int_amdgcn_ds_permute>; +defm DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <0x3f, "ds_bpermute_b32", VGPR_32, + int_amdgcn_ds_bpermute>; +} + } // End Predicates = [isVI] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll new file mode 100644 index 0000000..45cd45d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0 + +; FUNC-LABEL: {{^}}ds_bpermute: +; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0 + store i32 %bpermute, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone convergent } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll new file mode 100644 index 0000000..e217286 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0 + +; FUNC-LABEL: {{^}}ds_permute: +; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { + %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0 + store i32 %bpermute, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone convergent }