From e763e19808a84ae0218117c89864ff50cb6b0d16 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 25 Aug 2016 14:32:47 -0700 Subject: [PATCH] vc4: Add register allocation support for MUL output rotation. We need the source to be in r0-r3, so make a new register class for it. It will be up to the surrounding passes to make sure that the r0-r3 allocation of its source won't conflict with anything other class requirements on that temp. --- src/gallium/drivers/vc4/vc4_context.h | 1 + src/gallium/drivers/vc4/vc4_register_allocate.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index c3474a0..63a1dfb 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -283,6 +283,7 @@ struct vc4_context { struct ra_regs *regs; unsigned int reg_class_any; unsigned int reg_class_a_or_b_or_acc; + unsigned int reg_class_r0_r3; unsigned int reg_class_r4_or_a; unsigned int reg_class_a; diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 203b459..fc44764 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -119,6 +119,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4) vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs); vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs); vc4->reg_class_a = ra_alloc_reg_class(vc4->regs); + vc4->reg_class_r0_r3 = ra_alloc_reg_class(vc4->regs); for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) { /* Reserve ra31/rb31 for spilling fixup_raddr_conflict() in * vc4_qpu_emit.c @@ -135,6 +136,9 @@ vc4_alloc_reg_set(struct vc4_context *vc4) continue; } + if (vc4_regs[i].mux <= QPU_MUX_R3) + ra_class_add_reg(vc4->regs, vc4->reg_class_r0_r3, i); + ra_class_add_reg(vc4->regs, vc4->reg_class_any, i); ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i); } @@ -164,6 +168,7 @@ node_to_temp_priority(const void *in_a, const void *in_b) #define CLASS_BIT_A (1 << 0) #define CLASS_BIT_B_OR_ACC (1 << 1) #define CLASS_BIT_R4 (1 << 2) +#define CLASS_BIT_R0_R3 (1 << 4) /** * Returns a mapping from QFILE_TEMP indices to struct qpu_regs. @@ -240,6 +245,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2); break; + case QOP_ROT_MUL: + assert(inst->src[0].file == QFILE_TEMP); + class_bits[inst->src[0].index] &= ~CLASS_BIT_R0_R3; + break; + default: break; } @@ -287,6 +297,9 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) case CLASS_BIT_A: ra_set_node_class(g, node, vc4->reg_class_a); break; + case CLASS_BIT_R0_R3: + ra_set_node_class(g, node, vc4->reg_class_r0_r3); + break; default: fprintf(stderr, "temp %d: bad class bits: 0x%x\n", i, class_bits[i]); -- 2.7.4