From 93df06087195fc0fbeb70f9366bb6d8b64821dcd Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Mon, 27 Jul 2015 18:16:08 +0000 Subject: [PATCH] AMDGPU: don't match vgpr loads for constant loads Author: Dave Airlie In order to implement indirect sampler loads, we don't want to match on a VGPR load but an SGPR one for constants, as we cannot feed VGPRs to the sampler only SGPRs. this should be applicable for llvm 3.7 as well. llvm-svn: 243294 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 3 --- llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll | 12 +++--------- llvm/test/CodeGen/AMDGPU/smrd.ll | 8 +------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b2edc03..c296aeb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2910,9 +2910,6 @@ defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; } // End Predicates = [isSICI] class MUBUFScratchLoadPat : Pat < diff --git a/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll b/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll index 3c1fc6c..d4d1312 100644 --- a/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll +++ b/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll @@ -8,9 +8,7 @@ @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4 ; FUNC-LABEL: {{^}}float: -; FIXME: We should be using s_load_dword here. -; SI: buffer_load_dword -; VI: s_load_dword +; GCN: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -31,9 +29,7 @@ entry: ; FUNC-LABEL: {{^}}i32: -; FIXME: We should be using s_load_dword here. -; SI: buffer_load_dword -; VI: s_load_dword +; GCN: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -71,9 +67,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { <1 x i32> ] ; FUNC-LABEL: {{^}}array_v1_gv_load: -; FIXME: We should be using s_load_dword here. -; SI: buffer_load_dword -; VI: s_load_dword +; GCN: s_load_dword define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index b0c18ca..0598208 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -43,13 +43,7 @@ entry: ; GCN-LABEL: {{^}}smrd3: ; FIXME: There are too many copies here because we don't fold immediates ; through REG_SEQUENCE -; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ; -; SI: s_mov_b32 s[[SHI:[0-9]+]], 4 -; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]] -; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]] -; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] -; FIXME: We should be able to use s_load_dword here -; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 +; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b ; TODO: Add VI checks ; GCN: s_endpgm define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { -- 2.7.4