From 342f94647678b60357e7f6e988874a4339fb5ea8 Mon Sep 17 00:00:00 2001 From: Kwok Cheung Yeung Date: Fri, 15 Nov 2019 15:36:34 +0000 Subject: [PATCH] [amdgcn] Restrict registers available to non-kernel functions Restrict the number of SGPRs and VGPRs available to non-kernel functions to improve compute-unit occupancy with multiple threads. 2019-11-15 Kwok Cheung Yeung gcc/ * config/gcn/gcn.c (default_requested_args): New. (gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args set with default_requested_args. (gcn_conditional_register_usage): Limit register usage of non-kernel functions. Reassign fixed registers if a non-standard set of args is requested. * config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI. From-SVN: r278301 --- gcc/ChangeLog | 10 +++++++++ gcc/config/gcn/gcn.c | 63 ++++++++++++++++++++++++++++++---------------------- gcc/config/gcn/gcn.h | 6 ++--- 3 files changed, 49 insertions(+), 30 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fe8daf4..4de97a0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-11-15 Kwok Cheung Yeung + + * config/gcn/gcn.c (default_requested_args): New. + (gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args + set with default_requested_args. + (gcn_conditional_register_usage): Limit register usage of non-kernel + functions. Reassign fixed registers if a non-standard set of args is + requested. + * config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI. + 2019-11-15 Feng Xue PR ipa/92528 diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index d2ec6e1..383e0aa 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -191,6 +191,17 @@ static const struct gcn_kernel_arg_type {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2} }; +static const long default_requested_args + = (1 << PRIVATE_SEGMENT_BUFFER_ARG) + | (1 << DISPATCH_PTR_ARG) + | (1 << QUEUE_PTR_ARG) + | (1 << KERNARG_SEGMENT_PTR_ARG) + | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG) + | (1 << WORKGROUP_ID_X_ARG) + | (1 << WORK_ITEM_ID_X_ARG) + | (1 << WORK_ITEM_ID_Y_ARG) + | (1 << WORK_ITEM_ID_Z_ARG); + /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())). This function also sets the default values for some arguments. @@ -201,10 +212,7 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args, tree list) { bool err = false; - args->requested = ((1 << PRIVATE_SEGMENT_BUFFER_ARG) - | (1 << QUEUE_PTR_ARG) - | (1 << KERNARG_SEGMENT_PTR_ARG) - | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)); + args->requested = default_requested_args; args->nargs = 0; for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) @@ -242,8 +250,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args, args->requested |= (1 << a); args->order[args->nargs++] = a; } - args->requested |= (1 << WORKGROUP_ID_X_ARG); - args->requested |= (1 << WORK_ITEM_ID_Z_ARG); /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies @@ -253,10 +259,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args, if (args->requested & (1 << WORK_ITEM_ID_Y_ARG)) args->requested |= (1 << WORK_ITEM_ID_X_ARG); - /* Always enable this so that kernargs is in a predictable place for - gomp_print, etc. */ - args->requested |= (1 << DISPATCH_PTR_ARG); - int sgpr_regno = FIRST_SGPR_REG; args->nsgprs = 0; for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++) @@ -2045,27 +2047,34 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass, static void gcn_conditional_register_usage (void) { - int i; + if (!cfun || !cfun->machine) + return; - /* FIXME: Do we need to reset fixed_regs? */ + if (cfun->machine->normal_function) + { + /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */ + for (int i = SGPR_REGNO (62); i <= LAST_SGPR_REG; i++) + fixed_regs[i] = 1, call_used_regs[i] = 1; -/* Limit ourselves to 1/16 the register file for maximimum sized workgroups. - There are enough SGPRs not to limit those. - TODO: Adjust this more dynamically. */ - for (i = FIRST_VGPR_REG + 64; i <= LAST_VGPR_REG; i++) - fixed_regs[i] = 1, call_used_regs[i] = 1; + for (int i = VGPR_REGNO (24); i <= LAST_VGPR_REG; i++) + fixed_regs[i] = 1, call_used_regs[i] = 1; - if (!cfun || !cfun->machine || cfun->machine->normal_function) - { - /* Normal functions can't know what kernel argument registers are - live, so just fix the bottom 16 SGPRs, and bottom 3 VGPRs. */ - for (i = 0; i < 16; i++) - fixed_regs[FIRST_SGPR_REG + i] = 1; - for (i = 0; i < 3; i++) - fixed_regs[FIRST_VGPR_REG + i] = 1; return; } + /* If the set of requested args is the default set, nothing more needs to + be done. */ + if (cfun->machine->args.requested == default_requested_args) + return; + + /* Requesting a set of args different from the default violates the ABI. */ + if (!leaf_function_p ()) + warning (0, "A non-default set of initial values has been requested, " + "which violates the ABI!"); + + for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++) + fixed_regs[i] = 0; + /* Fix the runtime argument register containing values that may be needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be needed after the prologue so there's no need to fix them. */ @@ -2073,10 +2082,10 @@ gcn_conditional_register_usage (void) fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1; if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0) { + /* The upper 32-bits of the 64-bit descriptor are not used, so allow + the containing registers to be used for other purposes. */ fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1; fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1; - fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 2] = 1; - fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 3] = 1; } if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0) { diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h index b3b2d1a..dd3789b 100644 --- a/gcc/config/gcn/gcn.h +++ b/gcc/config/gcn/gcn.h @@ -160,9 +160,9 @@ #define FIXED_REGISTERS { \ /* Scalars. */ \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, \ /* fp sp lr. */ \ - 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, \ + 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, \ /* exec_save, cc_save */ \ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ @@ -180,7 +180,7 @@ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ /* VGRPs */ \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ -- 2.7.4