From 234fcb81d3e48dd46fc6f610df77511883b69fd8 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Thu, 17 Mar 2016 16:43:50 +0000 Subject: [PATCH] AMDGPU/SI: Do not generate s_waitcnt after ds_permute/ds_bpermute Symmary: ds_permute/ds_bpermute do not read memory so s_waitcnt is not needed. Reviewers arsenm, tstellarAMD Subscribers llvm-commits, arsenm Differential Revision: http://reviews.llvm.org/D18197 llvm-svn: 263720 --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +- .../test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll | 24 +++++++++++++++++++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll | 28 ++++++++++++++++++++-- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 24e4b3d..2a1ce12 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2519,7 +2519,7 @@ multiclass DS_1A1D_PERMUTE op, string opName, RegisterClass rc, dag ins = (ins VGPR_32:$addr, rc:$data0), string asm = opName#" $vdst, $addr, $data0"> { - let mayLoad = 0, mayStore = 0, isConvergent = 1 in { + let LGKM_CNT = 0, mayLoad = 0, mayStore = 0, isConvergent = 1 in { def "" : DS_Pseudo ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll index 45cd45d..d9c87b1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll @@ -10,4 +10,28 @@ define void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind ret void } +; FUNC-LABEL: {{^}}bpermute_no_waitcnt_test: +; CHECK: s_cbranch_scc1 +; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CHECK-NOT: s_waitcnt +define void @bpermute_no_waitcnt_test(i32 addrspace(1)* %out, i32 %cond) { +entry: + + %tmp = icmp eq i32 %cond, 0 + br i1 %tmp, label %if, label %else + +if: ; preds = %entry + + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 0, i32 0) #0 + br label %endif + +else: ; preds = %entry + br label %endif + +endif: + %val = phi i32 [ %bpermute, %if ], [0, %else] ; preds = %else, %if + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + attributes #0 = { nounwind readnone convergent } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll index e217286..2f774ab 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll @@ -5,9 +5,33 @@ declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0 ; FUNC-LABEL: {{^}}ds_permute: ; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { - %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + %permute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0 + store i32 %permute, i32 addrspace(1)* %out, align 4 ret void } +; FUNC-LABEL: {{^}}permute_no_waitcnt_test: +; CHECK: s_cbranch_scc1 +; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CHECK-NOT: s_waitcnt +define void @permute_no_waitcnt_test(i32 addrspace(1)* %out, i32 %cond) { +entry: + + %tmp = icmp eq i32 %cond, 0 + br i1 %tmp, label %if, label %else + +if: ; preds = %entry + %permute = call i32 @llvm.amdgcn.ds.permute(i32 0, i32 0) #0 + br label %endif + +else: ; preds = %entry + br label %endif + +endif: + %val = phi i32 [ %permute, %if ], [0, %else] ; preds = %else, %if + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + + attributes #0 = { nounwind readnone convergent } -- 2.7.4