From: Matt Arsenault Date: Tue, 15 Nov 2016 20:22:55 +0000 (+0000) Subject: AMDGPU: Enable store clustering X-Git-Tag: llvmorg-4.0.0-rc1~4496 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d4bb5e483118cfa2634a21689afa217134d98eab;p=platform%2Fupstream%2Fllvm.git AMDGPU: Enable store clustering Also respect the TII hook for these like the generic code does in case we want a flag later to disable this. llvm-svn: 287021 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 42c7b967f3e7..f88bb69c3a74 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -36,6 +36,10 @@ bool AMDGPUInstrInfo::enableClusterLoads() const { return true; } +bool AMDGPUInstrInfo::enableClusterStores() const { + return true; +} + // FIXME: This behaves strangely. If, for example, you have 32 load + stores, // the first 16 loads will be interleaved with the stores, and the next 16 will // be clustered as expected. It should really split into 2 16 store batches. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index de834f453a67..46e985dc8fde 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -40,6 +40,7 @@ public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); bool enableClusterLoads() const override; + bool enableClusterStores() const override; bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 877dacd06f7b..baf4d192c57f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -102,7 +102,14 @@ static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique(C)); - DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + + const SIInstrInfo *TII = static_cast(DAG->TII); + if (TII->enableClusterLoads()) + DAG->addMutation(createLoadClusterDAGMutation(TII, DAG->TRI)); + + if (TII->enableClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(TII, DAG->TRI)); + return DAG; } diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index d86f67a1334e..4beefb047f22 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -207,13 +207,21 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp } ; FUNC-LABEL: {{^}}reorder_global_offsets_addr64_soffset0: -; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64{{$}} -; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:20{{$}} ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:12{{$}} -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:28{{$}} -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:44{{$}} +; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:28{{$}} +; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:44{{$}} + +; GCN: v_mov_b32 +; GCN: v_mov_b32 + +; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64{{$}} +; GCN-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:20{{$}} + +; GCN: v_add_i32 +; GCN: v_add_i32 + ; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:36{{$}} -; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:52{{$}} +; GCN-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:52{{$}} define void @reorder_global_offsets_addr64_soffset0(i32 addrspace(1)* noalias nocapture %ptr.base) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() %id.ext = sext i32 %id to i64