From 0e4a75f9171398261ab8bbdc974dafbcaac0161c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 9 Sep 2019 22:21:17 -0700 Subject: [PATCH] intel/compiler: Record whether any pull constant loads occur I would like for iris to be able to avoid setting up SURFACE_STATE for UBOs in the common case where all constants are pushed. Unfortunately, we don't know up front whether everything will be pushed: the backend is allowed to demote pushed UBOs to pull loads fairly late in the process. This is probably desirable though, as we'd like the backend to be able to re-pull pushed data to break up long live ranges in response to register pressure. Here we simply add a "are there any pull loads at all" boolean to prog_data, which is a bit crude but at least allows us to skip work in the common "everything pushed" case. We could skip more work by tracking exactly which UBO surfaces are pulled in a bitmask, but I wanted to avoid bringing back the old mark_surface_used() mechanism. Finer-grained tracking could allow us to skip a bit more work when multiple UBOs are in use and /some/ are 100% pushed, but others are accessed via pulls. However, I'm not sure how common this is and it would save at most 4 pull descriptors, so we defer that for now. Reviewed-by: Caio Marcelo de Oliveira Filho --- src/intel/compiler/brw_compiler.h | 3 +++ src/intel/compiler/brw_fs.cpp | 4 ++++ src/intel/compiler/brw_fs_nir.cpp | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 340c8db..eb08444 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -654,6 +654,9 @@ struct brw_stage_prog_data { unsigned program_size; + /** Does this program pull from any UBO or other constant buffers? */ + bool has_ubo_pull; + /** * Register where the thread expects to find input data from the URB * (typically uniforms, followed by vertex or fragment attributes). diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a677c8f..f1fe468 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2426,6 +2426,8 @@ fs_visitor::get_pull_locs(const fs_reg &src, *out_surf_index = prog_data->binding_table.ubo_start + range->block; *out_pull_index = (32 * range->start + src.offset) / 4; + + prog_data->has_ubo_pull = true; return true; } @@ -2435,6 +2437,8 @@ fs_visitor::get_pull_locs(const fs_reg &src, /* A regular uniform push constant */ *out_surf_index = stage_prog_data->binding_table.pull_constants_start; *out_pull_index = pull_constant_loc[location]; + + prog_data->has_ubo_pull = true; return true; } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 210b710..043b4f1 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4323,6 +4323,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, base_offset, i * type_sz(dest.type)); + + prog_data->has_ubo_pull = true; } else { /* Even if we are loading doubles, a pull constant load will load * a 32-bit vec4, so should only reserve vgrf space for that. If we @@ -4362,6 +4364,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } } + prog_data->has_ubo_pull = true; + const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0); const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD); -- 2.7.4