From 840b806d641bbb6dabb3d456053bca5461f1d7ae Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 23 Jul 2019 07:59:00 -0700 Subject: [PATCH] panfrost/midgard: Allocate registers once (per-screen) This should save a lot of per-compile time by using the RA the way it's actually supposed to be used. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/pan_assemble.c | 2 +- src/gallium/drivers/panfrost/pan_blend_shaders.c | 2 +- src/gallium/drivers/panfrost/pan_context.h | 3 + src/panfrost/midgard/compiler.h | 3 + src/panfrost/midgard/midgard_compile.c | 3 +- src/panfrost/midgard/midgard_compile.h | 20 ++++++- src/panfrost/midgard/midgard_ra.c | 72 +++++++++++++++++++----- 7 files changed, 86 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 1bf32f1..5e6f944 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -67,7 +67,7 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m .alpha_ref = state->alpha_state.ref_value }; - midgard_compile_shader_nir(s, &program, false); + midgard_compile_shader_nir(&ctx->compiler, s, &program, false); /* Prepare the compiled binary for upload */ int size = program.compiled.size; diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c index a58808b..0063290 100644 --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c @@ -170,7 +170,7 @@ panfrost_compile_blend_shader( /* Compile the built shader */ midgard_program program; - midgard_compile_shader_nir(shader, &program, true); + midgard_compile_shader_nir(&ctx->compiler, shader, &program, true); /* Upload the shader */ diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 097de15..5584003 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -91,6 +91,9 @@ struct panfrost_context { /* Gallium context */ struct pipe_context base; + /* Compiler context */ + struct midgard_screen compiler; + /* Bound job and map of panfrost_job_key to jobs */ struct panfrost_job *job; struct hash_table *jobs; diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 186ee7d..a3e174c 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -188,6 +188,9 @@ typedef struct compiler_context { nir_shader *nir; gl_shader_stage stage; + /* The screen we correspond to */ + struct midgard_screen *screen; + /* Is internally a blend shader? Depends on stage == FRAGMENT */ bool is_blend; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 7d6bd31..5e5356f 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -2395,7 +2395,7 @@ midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx) } int -midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend) +midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midgard_program *program, bool is_blend) { struct util_dynarray *compiled = &program->compiled; @@ -2403,6 +2403,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl compiler_context ictx = { .nir = nir, + .screen = screen, .stage = nir->info.stage, .is_blend = is_blend, diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index c5ce9b7..2c86ccb 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -26,6 +26,24 @@ #include "compiler/nir/nir.h" #include "util/u_dynarray.h" +#include "util/register_allocate.h" + +/* To be shoved inside panfrost_screen for the Gallium driver, or somewhere + * else for Vulkan/standalone. The single compiler "screen" to be shared across + * all shader compiles, used to store complex initialization (for instance, + * related to register allocation) */ + +struct midgard_screen { + /* Precomputed register allocation sets for varying numbers of work + * registers. The zeroeth entry corresponds to 8 work registers. The + * eighth entry corresponds to 16 work registers. NULL if this set has + * not been allocated yet. */ + + struct ra_regs *regs[9]; + + /* Work register classes corresponds to the above register sets */ + unsigned reg_classes[9][4]; +}; /* Define the general compiler entry point */ @@ -92,7 +110,7 @@ typedef struct { } midgard_program; int -midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend); +midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midgard_program *program, bool is_blend); /* NIR options are shared between the standalone compiler and the online * compiler. Defining it here is the simplest, though maybe not the Right diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index fdd222b..01f0f49 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -157,17 +157,12 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, int reg) return r; } -/* This routine performs the actual register allocation. It should be succeeded - * by install_registers */ +/* This routine creates a register set. Should be called infrequently since + * it's slow and can be cached */ -struct ra_graph * -allocate_registers(compiler_context *ctx, bool *spilled) +static struct ra_regs * +create_register_set(unsigned work_count, unsigned *classes) { - /* The number of vec4 work registers available depends on when the - * uniforms start, so compute that first */ - - int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0); - int virtual_count = work_count * WORK_STRIDE; /* First, initialize the RA */ @@ -178,12 +173,10 @@ allocate_registers(compiler_context *ctx, bool *spilled) int work_vec2 = ra_alloc_reg_class(regs); int work_vec1 = ra_alloc_reg_class(regs); - unsigned classes[4] = { - work_vec1, - work_vec2, - work_vec3, - work_vec4 - }; + classes[0] = work_vec1; + classes[1] = work_vec2; + classes[2] = work_vec3; + classes[3] = work_vec4; /* Add the full set of work registers */ for (unsigned i = 0; i < work_count; ++i) { @@ -217,6 +210,55 @@ allocate_registers(compiler_context *ctx, bool *spilled) /* We're done setting up */ ra_set_finalize(regs, NULL); + return regs; +} + +/* This routine gets a precomputed register set off the screen if it's able, or otherwise it computes one on the fly */ + +static struct ra_regs * +get_register_set(struct midgard_screen *screen, unsigned work_count, unsigned **classes) +{ + /* Bounds check */ + assert(work_count >= 8); + assert(work_count <= 16); + + /* Compute index */ + unsigned index = work_count - 8; + + /* Find the reg set */ + struct ra_regs *cached = screen->regs[index]; + + if (cached) { + assert(screen->reg_classes[index]); + *classes = screen->reg_classes[index]; + return cached; + } + + /* Otherwise, create one */ + struct ra_regs *created = create_register_set(work_count, screen->reg_classes[index]); + + /* Cache it and use it */ + screen->regs[index] = created; + + *classes = screen->reg_classes[index]; + return created; +} + +/* This routine performs the actual register allocation. It should be succeeded + * by install_registers */ + +struct ra_graph * +allocate_registers(compiler_context *ctx, bool *spilled) +{ + /* The number of vec4 work registers available depends on when the + * uniforms start, so compute that first */ + int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0); + unsigned *classes = NULL; + struct ra_regs *regs = get_register_set(ctx->screen, work_count, &classes); + + assert(regs != NULL); + assert(classes != NULL); + /* No register allocation to do with no SSA */ if (!ctx->temp_count) -- 2.7.4