From 839b80aa896726d989b94ff583bebfa660638aeb Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 24 Jul 2019 08:31:46 -0700 Subject: [PATCH] pan/midgard: Allocate special register classes We'll want to also handle load/store and texture registers in our RA loop. Signed-off-by: Alyssa Rosenzweig --- src/panfrost/midgard/compiler.h | 9 ++++ src/panfrost/midgard/midgard_compile.h | 6 ++- src/panfrost/midgard/midgard_ra.c | 81 ++++++++++++++++++---------------- 3 files changed, 57 insertions(+), 39 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 326e351..8266c54 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -447,6 +447,15 @@ void schedule_program(compiler_context *ctx); struct ra_graph; +/* Broad types of register classes so we can handle special + * registers */ + +#define NR_REG_CLASSES 3 + +#define REG_CLASS_WORK 0 +#define REG_CLASS_LDST 1 +#define REG_CLASS_TEX 2 + struct ra_graph* allocate_registers(compiler_context *ctx, bool *spilled); void install_registers(compiler_context *ctx, struct ra_graph *g); bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src); diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index 9f80648..d450971 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -41,8 +41,10 @@ struct midgard_screen { struct ra_regs *regs[9]; - /* Work register classes corresponds to the above register sets */ - unsigned reg_classes[9][4]; + /* Work register classes corresponds to the above register + * sets. 12 per set for 4 classes per work/ldst/tex */ + + unsigned reg_classes[9][12]; }; /* Define the general compiler entry point */ diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 01f0f49..05d8452 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -158,51 +158,58 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, int reg) } /* This routine creates a register set. Should be called infrequently since - * it's slow and can be cached */ + * it's slow and can be cached. For legibility, variables are named in terms of + * work registers, although it is also used to create the register set for + * special register allocation */ static struct ra_regs * create_register_set(unsigned work_count, unsigned *classes) { - int virtual_count = work_count * WORK_STRIDE; + int virtual_count = 32 * WORK_STRIDE; /* First, initialize the RA */ struct ra_regs *regs = ra_alloc_reg_set(NULL, virtual_count, true); - int work_vec4 = ra_alloc_reg_class(regs); - int work_vec3 = ra_alloc_reg_class(regs); - int work_vec2 = ra_alloc_reg_class(regs); - int work_vec1 = ra_alloc_reg_class(regs); - - classes[0] = work_vec1; - classes[1] = work_vec2; - classes[2] = work_vec3; - classes[3] = work_vec4; - - /* Add the full set of work registers */ - for (unsigned i = 0; i < work_count; ++i) { - int base = WORK_STRIDE * i; - - /* Build a full set of subdivisions */ - ra_class_add_reg(regs, work_vec4, base); - ra_class_add_reg(regs, work_vec3, base + 1); - ra_class_add_reg(regs, work_vec3, base + 2); - ra_class_add_reg(regs, work_vec2, base + 3); - ra_class_add_reg(regs, work_vec2, base + 4); - ra_class_add_reg(regs, work_vec2, base + 5); - ra_class_add_reg(regs, work_vec1, base + 6); - ra_class_add_reg(regs, work_vec1, base + 7); - ra_class_add_reg(regs, work_vec1, base + 8); - ra_class_add_reg(regs, work_vec1, base + 9); - - for (unsigned a = 0; a < 10; ++a) { - unsigned mask1 = reg_type_to_mask[a]; - - for (unsigned b = 0; b < 10; ++b) { - unsigned mask2 = reg_type_to_mask[b]; - - if (mask1 & mask2) - ra_add_reg_conflict(regs, - base + a, base + b); + for (unsigned c = 0; c < NR_REG_CLASSES; ++c) { + int work_vec4 = ra_alloc_reg_class(regs); + int work_vec3 = ra_alloc_reg_class(regs); + int work_vec2 = ra_alloc_reg_class(regs); + int work_vec1 = ra_alloc_reg_class(regs); + + classes[4*c + 0] = work_vec1; + classes[4*c + 1] = work_vec2; + classes[4*c + 2] = work_vec3; + classes[4*c + 3] = work_vec4; + + /* Special register classes have two registers in them */ + unsigned count = (c == REG_CLASS_WORK) ? work_count : 2; + + /* Add the full set of work registers */ + for (unsigned i = 0; i < count; ++i) { + int base = WORK_STRIDE * i; + + /* Build a full set of subdivisions */ + ra_class_add_reg(regs, work_vec4, base); + ra_class_add_reg(regs, work_vec3, base + 1); + ra_class_add_reg(regs, work_vec3, base + 2); + ra_class_add_reg(regs, work_vec2, base + 3); + ra_class_add_reg(regs, work_vec2, base + 4); + ra_class_add_reg(regs, work_vec2, base + 5); + ra_class_add_reg(regs, work_vec1, base + 6); + ra_class_add_reg(regs, work_vec1, base + 7); + ra_class_add_reg(regs, work_vec1, base + 8); + ra_class_add_reg(regs, work_vec1, base + 9); + + for (unsigned a = 0; a < 10; ++a) { + unsigned mask1 = reg_type_to_mask[a]; + + for (unsigned b = 0; b < 10; ++b) { + unsigned mask2 = reg_type_to_mask[b]; + + if (mask1 & mask2) + ra_add_reg_conflict(regs, + base + a, base + b); + } } } } -- 2.7.4