From 4735c8a522c3607058fd8c3162c7376790144b5b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 15 Oct 2020 11:41:20 +0100 Subject: [PATCH] nir/loop_analyze: adjust force unrolling to only include interesting modes Instead of force-unrolling any loop which reads an entire array, only do it for arrays which might be faster to access with constant indices. Significantly improves compile-time for these CTS tests, which could previously timeout: dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage.struct_mixed_types.uniform_buffer_block_geom dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage.struct_mixed_types.uniform_geom dEQP-VK.spirv_assembly.instruction.graphics.8bit_storage.struct_mixed_types.storage_buffer_geom dEQP-VK.spirv_assembly.instruction.graphics.spirv_ids_abuse.lots_ids_geom fossil-db (Navi): Totals from 19 (0.01% of 137413) affected shaders: SGPRs: 1728 -> 1688 (-2.31%) VGPRs: 1176 -> 1168 (-0.68%) CodeSize: 198496 -> 136580 (-31.19%) MaxWaves: 154 -> 156 (+1.30%) Instrs: 38889 -> 26029 (-33.07%) Cycles: 446108 -> 1059924 (+137.59%); split: -0.91%, +138.51% VMEM: 3245 -> 2926 (-9.83%) SMEM: 850 -> 828 (-2.59%); split: +4.71%, -7.29% VClause: 549 -> 533 (-2.91%) SClause: 1810 -> 1522 (-15.91%) Copies: 2209 -> 1705 (-22.82%); split: -22.95%, +0.14% Branches: 854 -> 603 (-29.39%); split: -29.86%, +0.47% PreSGPRs: 1512 -> 1506 (-0.40%); split: -0.53%, +0.13% Signed-off-by: Rhys Perry Reviewed-by: Timothy Arceri Part-of: --- src/compiler/nir/nir_loop_analyze.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index fa930a7..d13dd70 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -1111,7 +1111,9 @@ force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref) { unsigned array_size = find_array_access_via_induction(state, deref, NULL); if (array_size) { - if (array_size == state->loop->info->max_trip_count) + if ((array_size == state->loop->info->max_trip_count) && + (deref->mode & (nir_var_shader_in | nir_var_shader_out | + nir_var_shader_temp | nir_var_function_temp))) return true; if (deref->mode & state->indirect_mask) -- 2.7.4