From a548c75e31b4146d55133cb8c57a82117c196584 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= Date: Fri, 4 Sep 2015 17:09:40 -0700 Subject: [PATCH] i965: Move perf_debug code to brw_codegen_*_prog() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We're trying to avoid a libdrm dependency in the core compiler, so let's move the perf_debug code one level up from the brw_*_emit() helpers to the brw_codegen_*_prog() helpers. Reviewed-by: Kenneth Graunke Reviewed-by: Jordan Justen Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_cs.c | 31 ++++++++++++++++++++----- src/mesa/drivers/dri/i965/brw_fs.cpp | 41 ---------------------------------- src/mesa/drivers/dri/i965/brw_vec4.cpp | 19 ---------------- src/mesa/drivers/dri/i965/brw_vs.c | 29 +++++++++++++++++++----- src/mesa/drivers/dri/i965/brw_wm.c | 31 ++++++++++++++++++++----- 5 files changed, 75 insertions(+), 76 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 012c466..cb3fae6 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -63,8 +63,11 @@ brw_codegen_cs_prog(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); GLuint program_size; struct brw_cs_prog_data prog_data; + bool start_busy = false; + double start_time = 0; - struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + struct brw_shader *cs = + (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; assert (cs); memset(&prog_data, 0, sizeof(prog_data)); @@ -73,8 +76,8 @@ brw_codegen_cs_prog(struct brw_context *brw, * prog_data associated with the compiled program, and which will be freed * by the state cache. */ - int param_count = cs->num_uniform_components + - cs->NumImages * BRW_IMAGE_PARAM_SIZE; + int param_count = cs->base.num_uniform_components + + cs->base.NumImages * BRW_IMAGE_PARAM_SIZE; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; @@ -83,9 +86,15 @@ brw_codegen_cs_prog(struct brw_context *brw, prog_data.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.image_param = - rzalloc_array(NULL, struct brw_image_param, cs->NumImages); + rzalloc_array(NULL, struct brw_image_param, cs->base.NumImages); prog_data.base.nr_params = param_count; - prog_data.base.nr_image_params = cs->NumImages; + prog_data.base.nr_image_params = cs->base.NumImages; + + if (unlikely(brw->perf_debug)) { + start_busy = (brw->batch.last_bo && + drm_intel_bo_busy(brw->batch.last_bo)); + start_time = get_time(); + } program = brw_cs_emit(brw, mem_ctx, key, &prog_data, &cp->program, prog, &program_size); @@ -94,6 +103,18 @@ brw_codegen_cs_prog(struct brw_context *brw, return false; } + if (unlikely(brw->perf_debug) && cs) { + if (cs->compiled_once) { + _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles"); + } + cs->compiled_once = true; + + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("CS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + } + if (prog_data.base.total_scratch) { brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo, prog_data.base.total_scratch * brw->max_cs_threads); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d4900f1..b9f1051 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5140,15 +5140,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size) { - bool start_busy = false; - double start_time = 0; - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - drm_intel_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - struct brw_shader *shader = NULL; if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; @@ -5226,17 +5217,6 @@ brw_wm_fs_emit(struct brw_context *brw, if (simd16_cfg) prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16); - if (unlikely(brw->perf_debug) && shader) { - if (shader->compiled_once) - brw_wm_debug_recompile(brw, prog, key); - shader->compiled_once = true; - - if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { - perf_debug("FS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - } - return g.get_assembly(final_assembly_size); } @@ -5286,15 +5266,6 @@ brw_cs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size) { - bool start_busy = false; - double start_time = 0; - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - drm_intel_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - struct brw_shader *shader = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; @@ -5368,17 +5339,5 @@ brw_cs_emit(struct brw_context *brw, g.generate_code(cfg, prog_data->simd_size); - if (unlikely(brw->perf_debug) && shader) { - if (shader->compiled_once) { - _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles"); - } - shader->compiled_once = true; - - if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { - perf_debug("CS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - } - return g.get_assembly(final_assembly_size); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 181768b..ed49cd3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1930,16 +1930,8 @@ brw_vs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size) { - bool start_busy = false; - double start_time = 0; const unsigned *assembly = NULL; - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && - drm_intel_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - struct brw_shader *shader = NULL; if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; @@ -2029,17 +2021,6 @@ brw_vs_emit(struct brw_context *brw, assembly = g.generate_assembly(v.cfg, final_assembly_size); } - if (unlikely(brw->perf_debug) && shader) { - if (shader->compiled_once) { - brw_vs_debug_recompile(brw, prog, key); - } - if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { - perf_debug("VS compile took %.03f ms and stalled the GPU\n", - (get_time() - start_time) * 1000); - } - shader->compiled_once = true; - } - return assembly; } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 4e0d34f..465e78f 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -98,10 +98,12 @@ brw_codegen_vs_prog(struct brw_context *brw, struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base; void *mem_ctx; int i; - struct gl_shader *vs = NULL; + struct brw_shader *vs = NULL; + bool start_busy = false; + double start_time = 0; if (prog) - vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; + vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; memset(&prog_data, 0, sizeof(prog_data)); @@ -121,9 +123,9 @@ brw_codegen_vs_prog(struct brw_context *brw, * case being a float value that gets blown up to a vec4, so be * conservative here. */ - param_count = vs->num_uniform_components * 4 + - vs->NumImages * BRW_IMAGE_PARAM_SIZE; - stage_prog_data->nr_image_params = vs->NumImages; + param_count = vs->base.num_uniform_components * 4 + + vs->base.NumImages * BRW_IMAGE_PARAM_SIZE; + stage_prog_data->nr_image_params = vs->base.NumImages; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } @@ -185,6 +187,12 @@ brw_codegen_vs_prog(struct brw_context *brw, true); } + if (unlikely(brw->perf_debug)) { + start_busy = (brw->batch.last_bo && + drm_intel_bo_busy(brw->batch.last_bo)); + start_time = get_time(); + } + /* Emit GEN4 code. */ program = brw_vs_emit(brw, mem_ctx, key, &prog_data, @@ -194,6 +202,17 @@ brw_codegen_vs_prog(struct brw_context *brw, return false; } + if (unlikely(brw->perf_debug) && vs) { + if (vs->compiled_once) { + brw_vs_debug_recompile(brw, prog, key); + } + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("VS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + vs->compiled_once = true; + } + /* Scratch space is used for register spilling */ if (prog_data.base.base.total_scratch) { brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo, diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 8dfa142..35c0908 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -165,11 +165,13 @@ brw_codegen_wm_prog(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); struct brw_wm_prog_data prog_data; const GLuint *program; - struct gl_shader *fs = NULL; + struct brw_shader *fs = NULL; GLuint program_size; + bool start_busy = false; + double start_time = 0; if (prog) - fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; memset(&prog_data, 0, sizeof(prog_data)); /* key->alpha_test_func means simulating alpha testing via discards, @@ -180,7 +182,7 @@ brw_codegen_wm_prog(struct brw_context *brw, fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); prog_data.computed_depth_mode = computed_depth_mode(&fp->program); - prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests; + prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests; /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (!prog) @@ -192,9 +194,9 @@ brw_codegen_wm_prog(struct brw_context *brw, */ int param_count; if (fs) { - param_count = fs->num_uniform_components + - fs->NumImages * BRW_IMAGE_PARAM_SIZE; - prog_data.base.nr_image_params = fs->NumImages; + param_count = fs->base.num_uniform_components + + fs->base.NumImages * BRW_IMAGE_PARAM_SIZE; + prog_data.base.nr_image_params = fs->base.NumImages; } else { param_count = fp->program.Base.Parameters->NumParameters * 4; } @@ -214,6 +216,12 @@ brw_codegen_wm_prog(struct brw_context *brw, key->persample_shading, &fp->program); + if (unlikely(brw->perf_debug)) { + start_busy = (brw->batch.last_bo && + drm_intel_bo_busy(brw->batch.last_bo)); + start_time = get_time(); + } + program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data, &fp->program, prog, &program_size); if (program == NULL) { @@ -221,6 +229,17 @@ brw_codegen_wm_prog(struct brw_context *brw, return false; } + if (unlikely(brw->perf_debug) && fs) { + if (fs->compiled_once) + brw_wm_debug_recompile(brw, prog, key); + fs->compiled_once = true; + + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("FS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + } + if (prog_data.base.total_scratch) { brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo, prog_data.base.total_scratch * brw->max_wm_threads); -- 2.7.4