From e8ade89bb3803fe07534b20db6b35bbc7ffc42e3 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Fri, 6 Sep 2019 15:33:53 +0000 Subject: [PATCH] [AMDGPU] Enable constant offset promotion to immediate operand for VMEM stores Differential revision: https://reviews.llvm.org/D66958 llvm-svn: 371214 --- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 9 ++++---- .../CodeGen/AMDGPU/promote-constOffset-to-imm.mir | 24 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 70fb377..49f3481 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1316,13 +1316,14 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( MemInfoMap &Visited, SmallPtrSet &AnchorList) { + if (!(MI.mayLoad() ^ MI.mayStore())) + return false; + // TODO: Support flat and scratch. - if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0 || - TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL) + if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0) return false; - // TODO: Support Store. - if (!MI.mayLoad()) + if (MI.mayLoad() && TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL) return false; if (AnchorList.count(&MI)) diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir index 5c92afc..cf95723 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir @@ -188,3 +188,27 @@ body: | %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec ... +--- + +# GFX9-LABEL: name: diffoporder_add_store +# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, 0, 0 +# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, 0, 0 + +name: diffoporder_add_store +body: | + bb.0.entry: + + %0:vreg_64 = COPY $vgpr0_vgpr1 + + %1:sgpr_32 = S_MOV_B32 4000 + %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0.sub0, %1, 0, implicit $exec + %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec + %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 + GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, implicit $exec + + %8:sgpr_32 = S_MOV_B32 3000 + %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 %0.sub0, %8, 0, implicit $exec + %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec + %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 + GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, implicit $exec +... -- 2.7.4