From f01bdb0484dd5224b183526d020ee3f2888cac45 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 15 Aug 2015 09:58:32 -0700 Subject: [PATCH] util/ra: Make allocating conflict lists optional Since i965 is now using make_reg_conflicts_transitive and doesn't need q-value computations, they are disabled on i965. They are enabled everywhere else so that they get the old behavior. This reduces the time spent in eglInitialize() on BDW by around 10-15%. Reviewed-by: Eric Anholt --- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 2 +- .../drivers/r300/compiler/radeon_pair_regalloc.c | 3 ++- src/gallium/drivers/vc4/vc4_register_allocate.c | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 +- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 2 +- src/util/register_allocate.c | 26 ++++++++++++++-------- src/util/register_allocate.h | 3 ++- 7 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index eaf3b3c..8801839 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -189,7 +189,7 @@ ir3_ra_alloc_reg_set(void *memctx) } /* allocate the reg-set.. */ - set->regs = ra_alloc_reg_set(set, ra_reg_count); + set->regs = ra_alloc_reg_set(set, ra_reg_count, true); set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count); set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count); diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c index 14f93fb..e8f4087 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -693,7 +693,8 @@ void rc_init_regalloc_state(struct rc_regalloc_state *s) }; /* Allocate the main ra data structure */ - s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW); + s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW, + true); /* Create the register classes */ for (i = 0; i < RC_REG_CLASS_COUNT; i++) { diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index a29db1f..b83500c 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -113,7 +113,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4) if (vc4->regs) return; - vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs)); + vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true); vc4->reg_class_any = ra_alloc_reg_class(vc4->regs); vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 1d39858..6eb9889 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -156,7 +156,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) } uint8_t *ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); - struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count); + struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count, false); if (devinfo->gen >= 6) ra_set_allocate_round_robin(regs); int *classes = ralloc_array(compiler, int, class_count); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index d7b6ad5..62ed708 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -115,7 +115,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) ralloc_free(compiler->vec4_reg_set.ra_reg_to_grf); compiler->vec4_reg_set.ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); ralloc_free(compiler->vec4_reg_set.regs); - compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count); + compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count, false); if (compiler->devinfo->gen >= 6) ra_set_allocate_round_robin(compiler->vec4_reg_set.regs); ralloc_free(compiler->vec4_reg_set.classes); diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c index c9867e3..7c9bf9b 100644 --- a/src/util/register_allocate.c +++ b/src/util/register_allocate.c @@ -183,7 +183,7 @@ struct ra_graph { * using ralloc_free(). */ struct ra_regs * -ra_alloc_reg_set(void *mem_ctx, unsigned int count) +ra_alloc_reg_set(void *mem_ctx, unsigned int count, bool need_conflict_lists) { unsigned int i; struct ra_regs *regs; @@ -197,9 +197,15 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count) BITSET_WORDS(count)); BITSET_SET(regs->regs[i].conflicts, i); - regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4); - regs->regs[i].conflict_list_size = 4; - regs->regs[i].conflict_list[0] = i; + if (need_conflict_lists) { + regs->regs[i].conflict_list = ralloc_array(regs->regs, + unsigned int, 4); + regs->regs[i].conflict_list_size = 4; + regs->regs[i].conflict_list[0] = i; + } else { + regs->regs[i].conflict_list = NULL; + regs->regs[i].conflict_list_size = 0; + } regs->regs[i].num_conflicts = 1; } @@ -227,12 +233,14 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) { struct ra_reg *reg1 = ®s->regs[r1]; - if (reg1->conflict_list_size == reg1->num_conflicts) { - reg1->conflict_list_size *= 2; - reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, - unsigned int, reg1->conflict_list_size); + if (reg1->conflict_list) { + if (reg1->conflict_list_size == reg1->num_conflicts) { + reg1->conflict_list_size *= 2; + reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, + unsigned int, reg1->conflict_list_size); + } + reg1->conflict_list[reg1->num_conflicts++] = r2; } - reg1->conflict_list[reg1->num_conflicts++] = r2; BITSET_SET(reg1->conflicts, r2); } diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h index ed3854c..628d2bb 100644 --- a/src/util/register_allocate.h +++ b/src/util/register_allocate.h @@ -44,7 +44,8 @@ struct ra_regs; * registers, such as aligned register pairs that conflict with the * two real registers from which they are composed. */ -struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count); +struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count, + bool need_conflict_lists); void ra_set_allocate_round_robin(struct ra_regs *regs); unsigned int ra_alloc_reg_class(struct ra_regs *regs); void ra_add_reg_conflict(struct ra_regs *regs, -- 2.7.4