From: Konstantin Zhuravlyov Date: Wed, 25 Oct 2017 17:04:46 +0000 (+0000) Subject: AMDGPU: Cleanup memory legalizer load/store tests X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cff1155035f50d94a04255a428b63fee5bad6200;p=platform%2Fupstream%2Fllvm.git AMDGPU: Cleanup memory legalizer load/store tests llvm-svn: 316590 --- diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll index 0434978..57e705f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll @@ -1,12 +1,16 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; CHECK-LABEL: {{^}}system_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +declare i32 @llvm.amdgcn.workitem.id.x() + +; GCN-LABEL: {{^}}system_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_unordered( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -15,12 +19,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}system_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}system_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_monotonic( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -29,12 +33,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}system_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}system_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_acquire( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -43,12 +47,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}system_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}system_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_seq_cst( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -57,12 +61,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}singlethread_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_unordered( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -71,12 +75,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}singlethread_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_monotonic( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -85,12 +89,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}singlethread_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_acquire( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -99,12 +103,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}singlethread_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_seq_cst( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -113,12 +117,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}agent_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_unordered( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -127,12 +131,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}agent_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_monotonic( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -141,12 +145,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}agent_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_acquire( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -155,12 +159,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}agent_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_seq_cst( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -169,12 +173,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}workgroup_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_unordered( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -183,12 +187,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}workgroup_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_monotonic( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -197,12 +201,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}workgroup_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_acquire( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -211,12 +215,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}workgroup_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_seq_cst( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -225,12 +229,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}wavefront_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_unordered( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -239,12 +243,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}wavefront_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_monotonic( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -253,12 +257,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}wavefront_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_acquire( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -267,12 +271,12 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +; GCN-LABEL: {{^}}wavefront_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_seq_cst( i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -280,3 +284,94 @@ entry: store i32 %val, i32 addrspace(4)* %out ret void } + +; GCN-LABEL: {{^}}nontemporal_private_0 +; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_0( + i32* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_private_1 +; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_1( + i32* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid + %val = load i32, i32* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_0 +; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}} +define amdgpu_kernel void @nontemporal_global_0( + i32 addrspace(1)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_1 +; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} +; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}} +define amdgpu_kernel void @nontemporal_global_1( + i32 addrspace(1)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_0 +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_0( + i32 addrspace(3)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_1 +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_1( + i32 addrspace(3)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid + %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_0 +; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_0( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_1 +; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_1( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid + %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +!0 = !{i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll deleted file mode 100644 index 2157ca3..0000000 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s - -declare i32 @llvm.amdgcn.workitem.id.x() - -; GCN-LABEL: {{^}}nontemporal_load_private_0 -; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_private_0( - i32* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_private_1 -; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_private_1( - i32* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid - %val = load i32, i32* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_global_0 -; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}} -define amdgpu_kernel void @nontemporal_load_global_0( - i32 addrspace(1)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_global_1 -; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} -; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_global_1( - i32 addrspace(1)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_local_0 -; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_load_local_0( - i32 addrspace(3)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_local_1 -; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_load_local_1( - i32 addrspace(3)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid - %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_flat_0 -; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_flat_0( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_flat_1 -; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_flat_1( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid - %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -!0 = !{i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll deleted file mode 100644 index 7860479..0000000 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s - -declare i32 @llvm.amdgcn.workitem.id.x() - -; GCN-LABEL: {{^}}nontemporal_store_private_0 -; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_private_0( - i32 addrspace(4)* %in, i32* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_private_1 -; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_private_1( - i32 addrspace(4)* %in, i32* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid - store i32 %val, i32* %out.gep, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_global_0 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_global_0( - i32 addrspace(4)* %in, i32 addrspace(1)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32 addrspace(1)* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_global_1 -; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} -; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_global_1( - i32 addrspace(4)* %in, i32 addrspace(1)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid - store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_local_0 -; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_store_local_0( - i32 addrspace(4)* %in, i32 addrspace(3)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32 addrspace(3)* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_local_1 -; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_store_local_1( - i32 addrspace(4)* %in, i32 addrspace(3)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid - store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_flat_0 -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_flat_0( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32 addrspace(4)* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_flat_1 -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_flat_1( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid - store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0 - ret void -} - -!0 = !{i32 1} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll index 14b7d3e..50df0bc 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll @@ -1,9 +1,13 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; CHECK-LABEL: {{^}}system_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +declare i32 @llvm.amdgcn.workitem.id.x() + +; GCN-LABEL: {{^}}system_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_unordered( i32 %in, i32 addrspace(4)* %out) { entry: @@ -11,9 +15,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}system_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}system_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_monotonic( i32 %in, i32 addrspace(4)* %out) { entry: @@ -21,9 +25,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}system_release -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}system_release +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_release( i32 %in, i32 addrspace(4)* %out) { entry: @@ -31,9 +35,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}system_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}system_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_seq_cst( i32 %in, i32 addrspace(4)* %out) { entry: @@ -41,9 +45,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}singlethread_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_unordered( i32 %in, i32 addrspace(4)* %out) { entry: @@ -51,9 +55,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}singlethread_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_monotonic( i32 %in, i32 addrspace(4)* %out) { entry: @@ -61,9 +65,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_release -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}singlethread_release +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_release( i32 %in, i32 addrspace(4)* %out) { entry: @@ -71,9 +75,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}singlethread_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}singlethread_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_seq_cst( i32 %in, i32 addrspace(4)* %out) { entry: @@ -81,9 +85,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}agent_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_unordered( i32 %in, i32 addrspace(4)* %out) { entry: @@ -91,9 +95,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}agent_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_monotonic( i32 %in, i32 addrspace(4)* %out) { entry: @@ -101,9 +105,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_release -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}agent_release +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_release( i32 %in, i32 addrspace(4)* %out) { entry: @@ -111,9 +115,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}agent_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}agent_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_seq_cst( i32 %in, i32 addrspace(4)* %out) { entry: @@ -121,9 +125,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}workgroup_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_unordered( i32 %in, i32 addrspace(4)* %out) { entry: @@ -131,9 +135,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}workgroup_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_monotonic( i32 %in, i32 addrspace(4)* %out) { entry: @@ -141,9 +145,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_release -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}workgroup_release +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_release( i32 %in, i32 addrspace(4)* %out) { entry: @@ -151,9 +155,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}workgroup_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}workgroup_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_seq_cst( i32 %in, i32 addrspace(4)* %out) { entry: @@ -161,9 +165,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}wavefront_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_unordered( i32 %in, i32 addrspace(4)* %out) { entry: @@ -171,9 +175,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}wavefront_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_monotonic( i32 %in, i32 addrspace(4)* %out) { entry: @@ -181,9 +185,9 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_release -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}wavefront_release +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_release( i32 %in, i32 addrspace(4)* %out) { entry: @@ -191,12 +195,104 @@ entry: ret void } -; CHECK-LABEL: {{^}}wavefront_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GCN-LABEL: {{^}}wavefront_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_seq_cst( i32 %in, i32 addrspace(4)* %out) { entry: store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4 ret void } + +; GCN-LABEL: {{^}}nontemporal_private_0 +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_0( + i32 addrspace(4)* %in, i32* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_private_1 +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_1( + i32 addrspace(4)* %in, i32* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid + store i32 %val, i32* %out.gep, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_0 +; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} +define amdgpu_kernel void @nontemporal_global_0( + i32 addrspace(4)* %in, i32 addrspace(1)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32 addrspace(1)* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_1 +; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} +define amdgpu_kernel void @nontemporal_global_1( + i32 addrspace(4)* %in, i32 addrspace(1)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid + store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_0 +; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_0( + i32 addrspace(4)* %in, i32 addrspace(3)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32 addrspace(3)* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_1 +; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_1( + i32 addrspace(4)* %in, i32 addrspace(3)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid + store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_0 +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_0( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32 addrspace(4)* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_1 +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_1( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid + store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0 + ret void +} + +!0 = !{i32 1}