From 6c34084d8eafff3a764cd1ad49afacd211470f7b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 30 Nov 2016 12:19:38 -0800 Subject: [PATCH] vc4: Try to schedule QIR instructions between writing to and reading math. This helps us get the delay slots between SFU writes and reads filled. total instructions in shared programs: 94494 -> 93970 (-0.55%) instructions in affected programs: 59206 -> 58682 (-0.89%) 3DMMES performance +1.89967% +/- 0.157611% (n=10,9) --- src/gallium/drivers/vc4/vc4_qir_schedule.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index ea48a85..89e6d1d 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -569,6 +569,28 @@ latency_between(struct schedule_node *before, struct schedule_node *after) after->inst->op == QOP_TEX_RESULT) return 100; + switch (before->inst->op) { + case QOP_RCP: + case QOP_RSQ: + case QOP_EXP2: + case QOP_LOG2: + for (int i = 0; i < qir_get_nsrc(after->inst); i++) { + if (after->inst->src[i].file == + before->inst->dst.file && + after->inst->src[i].index == + before->inst->dst.index) { + /* There are two QPU delay slots before we can + * read a math result, which could be up to 4 + * QIR instructions if they packed well. + */ + return 4; + } + } + break; + default: + break; + } + return 1; } -- 2.7.4