From 98728ce0718e49864b872beb76fc3afbf341b38a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 5 Aug 2015 20:54:02 -0700 Subject: [PATCH] vc4: Switch QPU_PACK_SCALED to be two non-SSA instructions. total instructions in shared programs: 98159 -> 98136 (-0.02%) instructions in affected programs: 12279 -> 12256 (-0.19%) --- src/gallium/drivers/vc4/vc4_program.c | 5 ++++- src/gallium/drivers/vc4/vc4_qir.c | 3 ++- src/gallium/drivers/vc4/vc4_qir.h | 6 ++++-- src/gallium/drivers/vc4/vc4_qpu_emit.c | 23 +++++++---------------- src/gallium/drivers/vc4/vc4_register_allocate.c | 3 ++- 5 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 303132f..ff41779 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1142,7 +1142,10 @@ emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w) rcp_w)); } - qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1])); + struct qreg packed = qir_get_temp(c); + qir_PACK_16A_I(c, packed, xyi[0]); + qir_PACK_16B_I(c, packed, xyi[1]); + qir_VPM_WRITE(c, packed); } static void diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 3a37451..a7b4bd6 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -76,7 +76,8 @@ static const struct qir_op_info qir_op_info[] = { [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 }, [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 }, [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 }, - [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true }, + [QOP_PACK_16A_I] = { "pack_16a_i", 1, 1 }, + [QOP_PACK_16B_I] = { "pack_16b_i", 1, 1 }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index ca93ab8..5e23420 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -104,12 +104,13 @@ enum qop { QOP_LOG2, QOP_VW_SETUP, QOP_VR_SETUP, - QOP_PACK_SCALED, QOP_PACK_8888_F, QOP_PACK_8A_F, QOP_PACK_8B_F, QOP_PACK_8C_F, QOP_PACK_8D_F, + QOP_PACK_16A_I, + QOP_PACK_16B_I, QOP_TLB_DISCARD_SETUP, QOP_TLB_STENCIL_SETUP, QOP_TLB_Z_WRITE, @@ -580,12 +581,13 @@ QIR_ALU1(RCP) QIR_ALU1(RSQ) QIR_ALU1(EXP2) QIR_ALU1(LOG2) -QIR_ALU2(PACK_SCALED) QIR_ALU1(PACK_8888_F) QIR_PACK(PACK_8A_F) QIR_PACK(PACK_8B_F) QIR_PACK(PACK_8C_F) QIR_PACK(PACK_8D_F) +QIR_PACK(PACK_16A_I) +QIR_PACK(PACK_16B_I) QIR_ALU1(VARY_ADD_C) QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index e89db3e..573a557 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -403,23 +403,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_a_FADD(dst, src[0], qpu_r5())); break; - case QOP_PACK_SCALED: { - uint64_t a = (qpu_a_MOV(dst, src[0]) | - QPU_SET_FIELD(QPU_PACK_A_16A, - QPU_PACK)); - uint64_t b = (qpu_a_MOV(dst, src[1]) | - QPU_SET_FIELD(QPU_PACK_A_16B, - QPU_PACK)); - - if (dst.mux == src[1].mux && dst.addr == src[1].addr) { - queue(c, b); - queue(c, a); - } else { - queue(c, a); - queue(c, b); - } + case QOP_PACK_16A_I: + case QOP_PACK_16B_I: + queue(c, + qpu_a_MOV(dst, src[0]) | + QPU_SET_FIELD(qinst->op == QOP_PACK_16A_I ? + QPU_PACK_A_16A : QPU_PACK_A_16B, + QPU_PACK)); break; - } case QOP_TEX_S: case QOP_TEX_T: diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index fa67a3a..58d812b 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -268,7 +268,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2); break; - case QOP_PACK_SCALED: + case QOP_PACK_16A_I: + case QOP_PACK_16B_I: /* The pack flags require an A-file dst register. */ class_bits[inst->dst.index] &= CLASS_BIT_A; break; -- 2.7.4