vc4: Fix math with a condition flag set.
authorEric Anholt <eric@anholt.net>
Wed, 8 Mar 2017 20:07:16 +0000 (12:07 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 8 Mar 2017 21:44:17 +0000 (13:44 -0800)
Math results land in r4, regardless of the condition.  To implement them,
we just need to ensure that the results are moved out of r4 (as often
happens anyway, the values is live across another math instruction), so
that we can attach the condition to the MOV.

Fixes dEQP-GLES2.functional.shaders.random.all_features.fragment.93 and a
couple others, that were assertion failing that their conditions hadn't
been handled during the QIR->QPU stage.

src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_register_allocate.c

index 60ca87a..aaa3a04 100644 (file)
@@ -226,10 +226,14 @@ static void
 handle_r4_qpu_write(struct qblock *block, struct qinst *qinst,
                     struct qpu_reg dst)
 {
-        if (dst.mux != QPU_MUX_R4)
+        if (dst.mux != QPU_MUX_R4) {
                 queue(block, qpu_a_MOV(dst, qpu_r4()));
-        else if (qinst->sf)
-                queue(block, qpu_a_MOV(qpu_ra(QPU_W_NOP), qpu_r4()));
+                set_last_cond_add(block, qinst->cond);
+        } else {
+                assert(qinst->cond == QPU_COND_ALWAYS);
+                if (qinst->sf)
+                        queue(block, qpu_a_MOV(qpu_ra(QPU_W_NOP), qpu_r4()));
+        }
 }
 
 static void
@@ -444,6 +448,7 @@ vc4_generate_code_block(struct vc4_compile *c,
                         }
 
                         handle_r4_qpu_write(block, qinst, dst);
+                        handled_qinst_cond = true;
 
                         break;
 
@@ -495,6 +500,7 @@ vc4_generate_code_block(struct vc4_compile *c,
                         *last_inst(block) = qpu_set_sig(*last_inst(block),
                                                         QPU_SIG_COLOR_LOAD);
                         handle_r4_qpu_write(block, qinst, dst);
+                        handled_qinst_cond = true;
                         break;
 
                 case QOP_VARY_ADD_C:
@@ -507,6 +513,7 @@ vc4_generate_code_block(struct vc4_compile *c,
                         *last_inst(block) = qpu_set_sig(*last_inst(block),
                                                         QPU_SIG_LOAD_TMU0);
                         handle_r4_qpu_write(block, qinst, dst);
+                        handled_qinst_cond = true;
                         break;
 
                 case QOP_THRSW:
index e48b8ee..506fdb5 100644 (file)
@@ -256,6 +256,14 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
                                 if (c->temp_start[i] < ip && c->temp_end[i] > ip)
                                         class_bits[i] &= ~CLASS_BIT_R4;
                         }
+
+                        /* If we're doing a conditional write of something
+                         * writing R4 (math, tex results), then make sure that
+                         * we store in a temp so that we actually
+                         * conditionally move the result.
+                         */
+                        if (inst->cond != QPU_COND_ALWAYS)
+                                class_bits[inst->dst.index] &= ~CLASS_BIT_R4;
                 } else {
                         /* R4 can't be written as a general purpose
                          * register. (it's TMU_NOSWAP as a write address).