vc4: Make SF be a flag on the QIR instructions.
authorEric Anholt <eric@anholt.net>
Thu, 12 Feb 2015 22:17:21 +0000 (14:17 -0800)
committerEric Anholt <eric@anholt.net>
Fri, 13 Feb 2015 00:33:16 +0000 (16:33 -0800)
Right now the places that used to emit a mov.sf just put the SF on the
previous instruction when it generated the source of the SF value.  Even
without optimization to push the sf up further (and kill thus potentially
kill more MOVs), this gets us:

total uniforms in shared programs: 13455 -> 13457 (0.01%)
uniforms in affected programs:     3 -> 5 (66.67%)
total instructions in shared programs: 40296 -> 40198 (-0.24%)
instructions in affected programs:     12595 -> 12497 (-0.78%)

src/gallium/drivers/vc4/vc4_opt_algebraic.c
src/gallium/drivers/vc4/vc4_opt_cse.c
src/gallium/drivers/vc4/vc4_opt_dead_code.c
src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index 994fa90..1e0b8c9 100644 (file)
@@ -149,22 +149,6 @@ qir_opt_algebraic(struct vc4_compile *c)
                         defs[inst->dst.index] = inst;
 
                 switch (inst->op) {
-                case QOP_SF:
-                        /* SF just looks at the sign bit, or whether all the
-                         * bits are 0.  This is preserved across an itof
-                         * transformation.
-                         */
-                        if (inst->src[0].file == QFILE_TEMP &&
-                            defs[inst->src[0].index]->op == QOP_ITOF) {
-                                dump_from(c, inst);
-                                inst->src[0] =
-                                        defs[inst->src[0].index]->src[0];
-                                progress =  true;
-                                dump_to(c, inst);
-                                break;
-                        }
-                        break;
-
                 case QOP_SEL_X_Y_ZS:
                 case QOP_SEL_X_Y_ZC:
                 case QOP_SEL_X_Y_NS:
index c11c90e..71794f7 100644 (file)
@@ -45,7 +45,7 @@ struct inst_key {
         enum qop op;
         struct qreg src[4];
         /**
-         * If the instruction depends on the flags, how many QOP_SFs have been
+         * If the instruction depends on the flags, how many SFs have been
          * seen before this instruction, or if it depends on r4, how many r4
          * writes have been seen.
          */
@@ -122,7 +122,6 @@ qir_opt_cse(struct vc4_compile *c)
 {
         bool progress = false;
         struct simple_node *node, *t;
-        struct qinst *last_sf = NULL;
         uint32_t sf_count = 0, r4_count = 0;
 
         struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
@@ -135,27 +134,11 @@ qir_opt_cse(struct vc4_compile *c)
 
                 if (qir_has_side_effects(c, inst) ||
                     qir_has_side_effect_reads(c, inst)) {
-                        if (inst->op == QOP_TLB_DISCARD_SETUP)
-                                last_sf = NULL;
                         continue;
                 }
 
-                if (inst->op == QOP_SF) {
-                        if (last_sf &&
-                            qir_reg_equals(last_sf->src[0], inst->src[0])) {
-                                if (debug) {
-                                        fprintf(stderr,
-                                                "Removing redundant SF: ");
-                                        qir_dump_inst(c, inst);
-                                        fprintf(stderr, "\n");
-                                }
-                                qir_remove_instruction(inst);
-                                progress = true;
-                                continue;
-                        } else {
-                                last_sf = inst;
-                                sf_count++;
-                        }
+                if (inst->sf) {
+                        sf_count++;
                 } else {
                         struct qinst *cse = vc4_find_cse(c, ht, inst,
                                                          sf_count, r4_count);
index 94ab382..dd1561d 100644 (file)
@@ -43,6 +43,7 @@ dce(struct vc4_compile *c, struct qinst *inst)
                 qir_dump_inst(c, inst);
                 fprintf(stderr, "\n");
         }
+        assert(!inst->sf);
         qir_remove_instruction(inst);
 }
 
@@ -93,6 +94,7 @@ qir_opt_dead_code(struct vc4_compile *c)
 
                 if (inst->dst.file == QFILE_TEMP &&
                     !used[inst->dst.index] &&
+                    !inst->sf &&
                     (!qir_has_side_effects(c, inst) ||
                      inst->op == QOP_TEX_RESULT) &&
                     !has_nonremovable_reads(c, inst)) {
@@ -120,11 +122,16 @@ qir_opt_dead_code(struct vc4_compile *c)
 
                 if (qir_depends_on_flags(inst))
                         sf_used = true;
-                if (inst->op == QOP_SF) {
+                if (inst->sf) {
                         if (!sf_used) {
-                                dce(c, inst);
+                                if (debug) {
+                                        fprintf(stderr, "Removing SF on: ");
+                                        qir_dump_inst(c, inst);
+                                        fprintf(stderr, "\n");
+                                }
+
+                                inst->sf = false;
                                 progress = true;
-                                continue;
                         }
                         sf_used = false;
                 }
index 0269e32..ba322b6 100644 (file)
@@ -79,7 +79,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
                 if (qir_is_multi_instruction(inst))
                         continue;
 
-                if (qir_depends_on_flags(inst))
+                if (qir_depends_on_flags(inst) || inst->sf)
                         continue;
 
                 if (qir_has_side_effects(c, inst) ||
@@ -106,6 +106,7 @@ qir_opt_vpm_writes(struct vc4_compile *c)
                 /* Move the generating instruction to the end of the program
                  * to maintain the order of the VPM writes.
                  */
+                assert(!vpm_writes[i]->sf);
                 move_to_tail(&vpm_writes[i]->link, &inst->link);
                 qir_remove_instruction(vpm_writes[i]);
 
index 3f0de2c..9d3d868 100644 (file)
@@ -2163,6 +2163,12 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage,
         }
 
         tgsi_parse_free(&c->parser);
+        if (vc4_debug & VC4_DEBUG_QIR) {
+                fprintf(stderr, "%s prog %d/%d pre-opt QIR:\n",
+                        qir_get_stage_name(c->stage),
+                        c->program_id, c->variant_id);
+                qir_dump(c);
+        }
 
         qir_optimize(c);
 
index feb585d..9e0ee1f 100644 (file)
@@ -59,7 +59,6 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_XOR] = { "xor", 1, 2 },
         [QOP_NOT] = { "not", 1, 1 },
 
-        [QOP_SF] = { "sf", 0, 1 },
         [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true },
         [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
         [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
@@ -282,7 +281,9 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
 void
 qir_dump_inst(struct vc4_compile *c, struct qinst *inst)
 {
-        fprintf(stderr, "%s ", qir_get_op_name(inst->op));
+        fprintf(stderr, "%s%s ",
+                qir_get_op_name(inst->op),
+                inst->sf ? ".sf" : "");
 
         qir_print_reg(c, inst->dst, true);
         for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
@@ -416,6 +417,20 @@ qir_get_stage_name(enum qstage stage)
         return names[stage];
 }
 
+void
+qir_SF(struct vc4_compile *c, struct qreg src)
+{
+        assert(!is_empty_list(&c->instructions));
+        struct qinst *last_inst = (struct qinst *)c->instructions.prev;
+        if (last_inst->dst.file != src.file ||
+            last_inst->dst.index != src.index ||
+            qir_is_multi_instruction(last_inst)) {
+                src = qir_MOV(c, src);
+                last_inst = (struct qinst *)c->instructions.prev;
+        }
+        last_inst->sf = true;
+}
+
 #define OPTPASS(func)                                                   \
         do {                                                            \
                 bool stage_progress = func(c);                          \
index ee86994..6da6ff6 100644 (file)
@@ -24,6 +24,7 @@
 #ifndef VC4_QIR_H
 #define VC4_QIR_H
 
+#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
@@ -76,9 +77,6 @@ enum qop {
         QOP_XOR,
         QOP_NOT,
 
-        /* Sets the flag register according to src. */
-        QOP_SF,
-
         /* Note: Orderings of these compares must be the same as in
          * qpu_defines.h.  Selects the src[0] if the ns flag bit is set,
          * otherwise 0. */
@@ -173,6 +171,7 @@ struct qinst {
         enum qop op;
         struct qreg dst;
         struct qreg *src;
+        bool sf;
 };
 
 enum qstage {
@@ -397,6 +396,8 @@ bool qir_opt_vpm_writes(struct vc4_compile *c);
 
 void qpu_schedule_instructions(struct vc4_compile *c);
 
+void qir_SF(struct vc4_compile *c, struct qreg src);
+
 #define QIR_ALU0(name)                                                   \
 static inline struct qreg                                                \
 qir_##name(struct vc4_compile *c)                                        \
@@ -443,7 +444,6 @@ QIR_ALU2(FADD)
 QIR_ALU2(FSUB)
 QIR_ALU2(FMUL)
 QIR_ALU2(MUL24)
-QIR_NODST_1(SF)
 QIR_ALU1(SEL_X_0_ZS)
 QIR_ALU1(SEL_X_0_ZC)
 QIR_ALU1(SEL_X_0_NS)
index 7531be5..eeb8d3a 100644 (file)
@@ -270,11 +270,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                         }
                         break;
 
-                case QOP_SF:
-                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_NOP), src[0]));
-                        *last_inst(c) |= QPU_SF;
-                        break;
-
                 case QOP_SEL_X_0_ZS:
                 case QOP_SEL_X_0_ZC:
                 case QOP_SEL_X_0_NS:
@@ -548,6 +543,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
 
                         break;
                 }
+
+                if (qinst->sf) {
+                        assert(!qir_is_multi_instruction(qinst));
+                        *last_inst(c) |= QPU_SF;
+                }
         }
 
         qpu_schedule_instructions(c);