From 86b0eb81837f4912d3f10bcfad35086cc54a1841 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Fri, 22 Nov 2019 16:43:46 +0000 Subject: [PATCH] Limit LDS usage. 2019-11-22 Andrew Stubbs gcc/ * config/gcn/gcn.c (OMP_LDS_SIZE): Define. (ACC_LDS_SIZE): Define. (OTHER_LDS_SIZE): Define. (LDS_SIZE): Redefine using above. (gcn_expand_prologue): Initialize m0 with LDS_SIZE-1. From-SVN: r278622 --- gcc/ChangeLog | 8 ++++++++ gcc/config/gcn/gcn.c | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9fbe728..443d64d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-11-22 Andrew Stubbs + + * config/gcn/gcn.c (OMP_LDS_SIZE): Define. + (ACC_LDS_SIZE): Define. + (OTHER_LDS_SIZE): Define. + (LDS_SIZE): Redefine using above. + (gcn_expand_prologue): Initialize m0 with LDS_SIZE-1. + 2019-11-22 Martin Sebor PR tree-optimization/92501 diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index 3a8c10e..f85d84b 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -70,10 +70,15 @@ int gcn_isa = 3; /* Default to GCN3. */ worker-single mode to worker-partitioned mode), per workgroup. Global analysis could calculate an exact bound, but we don't do that yet. - We reserve the whole LDS, which also prevents any other workgroup - sharing the Compute Unit. */ + We want to permit full occupancy, so size accordingly. */ -#define LDS_SIZE 65536 +#define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */ +#define ACC_LDS_SIZE 32768 /* Half of the total should be fine. */ +#define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */ + +#define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \ + : flag_openmp ? OMP_LDS_SIZE \ + : OTHER_LDS_SIZE) /* The number of registers usable by normal non-kernel functions. The SGPR count includes any special extra registers such as VCC. */ @@ -2876,8 +2881,11 @@ gcn_expand_prologue () /* Ensure that the scheduler doesn't do anything unexpected. */ emit_insn (gen_blockage ()); + /* m0 is initialized for the usual LDS DS and FLAT memory case. + The low-part is the address of the topmost addressable byte, which is + size-1. The high-part is an offset and should be zero. */ emit_move_insn (gen_rtx_REG (SImode, M0_REG), - gen_int_mode (LDS_SIZE, SImode)); + gen_int_mode (LDS_SIZE-1, SImode)); emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG))); -- 2.7.4