pan/bi: Switch to new IR

author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>

Wed, 30 Dec 2020 20:50:50 +0000 (15:50 -0500)

committer Marge Bot <eric+marge@anholt.net>

Thu, 31 Dec 2020 14:39:02 +0000 (14:39 +0000)
author Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 30 Dec 2020 20:50:50 +0000 (15:50 -0500)
committer Marge Bot <eric+marge@anholt.net>
Thu, 31 Dec 2020 14:39:02 +0000 (14:39 +0000)
diff --git a/.gitlab-ci/deqp-panfrost-g52-fails.txt b/.gitlab-ci/deqp-panfrost-g52-fails.txt

index 94b3837..36ebabf 100644 (file)
--- a/.gitlab-ci/deqp-panfrost-g52-fails.txt
+++ b/.gitlab-ci/deqp-panfrost-g52-fails.txt
@@ -14,7 +14,6 @@ dEQP-GLES2.functional.fbo.completeness.renderable.texture.color0.rgb_half_float_
  dEQP-GLES2.functional.fbo.completeness.size.distinct,Fail
  dEQP-GLES2.functional.negative_api.shader.uniform_matrixfv_invalid_transpose,Fail
  dEQP-GLES2.functional.negative_api.texture.generatemipmap_zero_level_array_compressed,Fail
-dEQP-GLES2.functional.shaders.random.all_features.fragment.88,Fail
  dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod,Fail
  dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear,Fail
  dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest,Fail
@@ -38,4 +37,3 @@ dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat,Fail
  dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp,Fail
  dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror,Fail
  dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat,Fail
-dEQP-GLES2.functional.uniform_api.random.79,Fail
diff --git a/src/panfrost/bifrost/bi_liveness.c b/src/panfrost/bifrost/bi_liveness.c

index 94275d0..712e77a 100644 (file)
--- a/src/panfrost/bifrost/bi_liveness.c
+++ b/src/panfrost/bifrost/bi_liveness.c
@@ -25,15 +25,15 @@
  #include "compiler.h"
  
  void
-bi_liveness_ins_update(uint16_t *live, bi_instruction *ins, unsigned max)
+bi_liveness_ins_update(uint16_t *live, bi_instr *ins, unsigned max)
  {
          /* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */
  
-        pan_liveness_kill(live, ins->dest, max, bi_writemask(ins));
+        pan_liveness_kill(live, bi_get_node(ins->dest[0]), max, bi_writemask_new(ins));
  
          bi_foreach_src(ins, src) {
-                unsigned node = ins->src[src];
-                unsigned bytemask = bi_bytemask_of_read_components(ins, node);
+                unsigned node = bi_get_node(ins->src[src]);
+                unsigned bytemask = bi_bytemask_of_read_components_new(ins, ins->src[src]);
  
                  pan_liveness_gen(live, node, max, bytemask);
          }
@@ -42,7 +42,7 @@ bi_liveness_ins_update(uint16_t *live, bi_instruction *ins, unsigned max)
  static void
  bi_liveness_ins_update_wrap(uint16_t *live, void *ins, unsigned max)
  {
-        bi_liveness_ins_update(live, (bi_instruction *) ins, max);
+        bi_liveness_ins_update(live, (bi_instr *) ins, max);
  }
  
  void
diff --git a/src/panfrost/bifrost/bi_opt_dce.c b/src/panfrost/bifrost/bi_opt_dce.c

index 9a0c039..7310705 100644 (file)
--- a/src/panfrost/bifrost/bi_opt_dce.c
+++ b/src/panfrost/bifrost/bi_opt_dce.c
@@ -36,12 +36,13 @@ bi_opt_dead_code_eliminate(bi_context *ctx, bi_block *block)
  
          uint16_t *live = mem_dup(block->base.live_out, temp_count * sizeof(uint16_t));
  
-        bi_foreach_instr_in_block_safe_rev(block, ins) {
-                if (ins->dest && !(ins->dest & BIR_SPECIAL)) {
-                        if (!live[ins->dest]) {
-                                bi_remove_instruction(ins);
-                                progress |= true;
-                        }
+        bi_foreach_instr_in_block_safe_rev(block, _ins) {
+                bi_instr *ins = (bi_instr *) _ins;
+                unsigned index = bi_get_node(ins->dest[0]);
+
+                if (index < temp_count && !live[index]) {
+                        bi_remove_instruction((bi_instruction *) ins);
+                        progress |= true;
                  }
  
                  bi_liveness_ins_update(live, ins, temp_count);
diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c

index 7d276c5..3b5d127 100644 (file)
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@@ -49,7 +49,7 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd
                          (next_1 == NULL) ? BIFROST_FLOW_END :
                          clause->flow_control,
                  .terminate_discarded_threads = tdd,
-                .next_clause_prefetch = clause->next_clause_prefetch,
+                .next_clause_prefetch = clause->next_clause_prefetch && next_1,
                  .staging_barrier = clause->staging_barrier,
                  .staging_register = clause->staging_register,
                  .dependency_wait = dependency_wait,
@@ -105,15 +105,16 @@ bi_constant_field(unsigned idx)
  static bool
  bi_assign_fau_idx_single(bi_registers *regs,
                           bi_clause *clause,
-                         bi_instruction *ins,
+                         bi_instr *ins,
                           bool assigned,
                           bool fast_zero)
  {
          if (!ins)
                  return assigned;
  
-        if (ins->type == BI_BRANCH && clause->branch_constant) {
-                /* By convention branch constant is last */
+        if (ins->branch_target && clause->branch_constant) {
+                /* By convention branch constant is last XXX: this whole thing
+                 * is a hack, FIXME */
                  unsigned idx = clause->constant_count - 1;
  
                  /* We can only jump to clauses which are qword aligned so the
@@ -126,18 +127,26 @@ bi_assign_fau_idx_single(bi_registers *regs,
                  if (assigned && regs->fau_idx != C)
                          unreachable("Mismatched fau_idx: branch");
  
+                bi_foreach_src(ins, s) {
+                        if (ins->src[s].type == BI_INDEX_CONSTANT)
+                                ins->src[s] = bi_passthrough(BIFROST_SRC_FAU_HI);
+                }
+
                  regs->fau_idx = C;
                  return true;
          }
  
          bi_foreach_src(ins, s) {
-                if (ins->src[s] & BIR_INDEX_CONSTANT) {
+                if (ins->src[s].type == BI_INDEX_CONSTANT) {
                          bool hi = false;
-                        uint32_t cons = bi_get_immediate(ins, s);
+                        uint32_t cons = ins->src[s].value;
+                        unsigned swizzle = ins->src[s].swizzle;
  
                          /* FMA can encode zero for free */
                          if (cons == 0 && fast_zero) {
-                                ins->src[s] = BIR_INDEX_PASS | BIFROST_SRC_STAGE;
+                                assert(!ins->src[s].abs && !ins->src[s].neg);
+                                ins->src[s] = bi_passthrough(BIFROST_SRC_STAGE);
+                                ins->src[s].swizzle = swizzle;
                                  continue;
                          }
  
@@ -149,16 +158,17 @@ bi_assign_fau_idx_single(bi_registers *regs,
                                  unreachable("Mismatched uniform/const field: imm");
  
                          regs->fau_idx = f;
-                        ins->src[s] = BIR_INDEX_PASS | (hi ? BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO);
+                        ins->src[s] = bi_passthrough(hi ? BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO);
+                        ins->src[s].swizzle = swizzle;
                          assigned = true;
-                } else if (ins->src[s] & BIR_INDEX_FAU) {
-                        unsigned index = ins->src[s] & BIR_FAU_TYPE_MASK;
-                        bool hi = !!(ins->src[s] & BIR_FAU_HI);
-
-                        assert(!assigned || regs->fau_idx == index);
-                        regs->fau_idx = index;
-                        ins->src[s] = BIR_INDEX_PASS |
-                                      (hi ? BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO);
+                } else if (ins->src[s].type == BI_INDEX_FAU) {
+                        bool hi = ins->src[s].offset > 0;
+
+                        assert(!assigned || regs->fau_idx == ins->src[s].value);
+                        assert(ins->src[s].swizzle == BI_SWIZZLE_H01);
+                        regs->fau_idx = ins->src[s].value;
+                        ins->src[s] = bi_passthrough(hi ? BIFROST_SRC_FAU_HI :
+                                        BIFROST_SRC_FAU_LO);
                          assigned = true;
                  }
          }
@@ -171,43 +181,41 @@ bi_assign_fau_idx(bi_clause *clause,
                    bi_bundle *bundle)
  {
          bool assigned =
-                bi_assign_fau_idx_single(&bundle->regs, clause, bundle->fma, false, true);
+                bi_assign_fau_idx_single(&bundle->regs, clause, (bi_instr *) bundle->fma, false, true);
  
-        bi_assign_fau_idx_single(&bundle->regs, clause, bundle->add, assigned, false);
+        bi_assign_fau_idx_single(&bundle->regs, clause, (bi_instr *) bundle->add, assigned, false);
  }
  
  /* Assigns a slot for reading, before anything is written */
  
  static void
-bi_assign_slot_read(bi_registers *regs, unsigned src)
+bi_assign_slot_read(bi_registers *regs, bi_index src)
  {
          /* We only assign for registers */
-        if (!(src & BIR_INDEX_REGISTER))
+        if (src.type != BI_INDEX_REGISTER)
                  return;
  
-        unsigned reg = src & ~BIR_INDEX_REGISTER;
-
          /* Check if we already assigned the slot */
          for (unsigned i = 0; i <= 1; ++i) {
-                if (regs->slot[i] == reg && regs->enabled[i])
+                if (regs->slot[i] == src.value && regs->enabled[i])
                          return;
          }
  
-        if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)
+        if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ)
                  return;
  
          /* Assign it now */
  
          for (unsigned i = 0; i <= 1; ++i) {
                  if (!regs->enabled[i]) {
-                        regs->slot[i] = reg;
+                        regs->slot[i] = src.value;
                          regs->enabled[i] = true;
                          return;
                  }
          }
  
          if (!regs->slot23.slot3) {
-                regs->slot[2] = reg;
+                regs->slot[2] = src.value;
                  regs->slot23.slot2 = BIFROST_OP_READ;
                  return;
          }
@@ -223,44 +231,52 @@ bi_assign_slots(bi_bundle *now, bi_bundle *prev)
           * use the data registers, which has its own mechanism entirely
           * and thus gets skipped over here. */
  
-        unsigned read_dreg = now->add &&
-                bi_class_props[now->add->type] & BI_DATA_REG_SRC;
+        bool read_dreg = now->add &&
+                bi_opcode_props[((bi_instr *) now->add)->op].sr_read;
  
-        unsigned write_dreg = prev->add &&
-                bi_class_props[prev->add->type] & BI_DATA_REG_DEST;
+        bool write_dreg = now->add &&
+                bi_opcode_props[((bi_instr *) now->add)->op].sr_write;
  
          /* First, assign reads */
  
          if (now->fma)
                  bi_foreach_src(now->fma, src)
-                        bi_assign_slot_read(&now->regs, now->fma->src[src]);
+                        bi_assign_slot_read(&now->regs, ((bi_instr *) now->fma)->src[src]);
  
          if (now->add) {
                  bi_foreach_src(now->add, src) {
                          if (!(src == 0 && read_dreg))
-                                bi_assign_slot_read(&now->regs, now->add->src[src]);
+                                bi_assign_slot_read(&now->regs, ((bi_instr *) now->add)->src[src]);
                  }
          }
  
-        /* Next, assign writes */
+        /* Next, assign writes. Staging writes are assigned separately, but
+         * +ATEST wants its destination written to both a staging register
+         * _and_ a regular write, because it may not generate a message */
  
-        if (prev->add && prev->add->dest & BIR_INDEX_REGISTER && !write_dreg) {
-                now->regs.slot[3] = prev->add->dest & ~BIR_INDEX_REGISTER;
-                now->regs.slot23.slot3 = BIFROST_OP_WRITE;
-        }
+        if (prev->add && (!write_dreg || ((bi_instr *) prev->add)->op == BI_OPCODE_ATEST)) {
+                bi_index idx = ((bi_instr *) prev->add)->dest[0];
  
-        if (prev->fma && prev->fma->dest & BIR_INDEX_REGISTER) {
-                unsigned r = prev->fma->dest & ~BIR_INDEX_REGISTER;
-
-                if (now->regs.slot23.slot3) {
-                        /* Scheduler constraint: cannot read 3 and write 2 */
-                        assert(!now->regs.slot23.slot2);
-                        now->regs.slot[2] = r;
-                        now->regs.slot23.slot2 = BIFROST_OP_WRITE;
-                } else {
-                        now->regs.slot[3] = r;
+                if (idx.type == BI_INDEX_REGISTER) {
+                        now->regs.slot[3] = idx.value;
                          now->regs.slot23.slot3 = BIFROST_OP_WRITE;
-                        now->regs.slot23.slot3_fma = true;
+                }
+        }
+
+        if (prev->fma) {
+                bi_index idx = ((bi_instr *) prev->fma)->dest[0];
+
+                if (idx.type == BI_INDEX_REGISTER) {
+                        if (now->regs.slot23.slot3) {
+                                /* Scheduler constraint: cannot read 3 and write 2 */
+                                assert(!now->regs.slot23.slot2);
+                                now->regs.slot[2] = idx.value;
+                                now->regs.slot23.slot2 = BIFROST_OP_WRITE;
+                        } else {
+                                now->regs.slot[3] = idx.value;
+                                now->regs.slot23.slot3 = BIFROST_OP_WRITE;
+                                now->regs.slot23.slot3_fma = true;
+                        }
                  }
          }
  
@@ -934,39 +950,65 @@ bi_flip_slots(bi_registers *regs)
  static void
  bi_lower_cubeface2(bi_context *ctx, bi_bundle *bundle)
  {
+        bi_instr *old = (bi_instr *) bundle->add;
+
          /* Filter for +CUBEFACE2 */
-        if (!bundle->add || bundle->add->type != BI_SPECIAL_ADD
-                         || bundle->add->op.special != BI_SPECIAL_CUBEFACE2) {
+        if (!old || old->op != BI_OPCODE_CUBEFACE2)
                  return;
-        }
  
          /* This won't be used once we emit non-singletons, for now this is just
           * a fact of our scheduler and allows us to clobber FMA */
          assert(!bundle->fma);
  
          /* Construct an FMA op */
-        bi_instruction cubeface1 = {
-                .type = BI_SPECIAL_FMA,
-                .op.special = BI_SPECIAL_CUBEFACE1,
-                /* no dest, just to a temporary */
-                .dest_type = nir_type_float32,
-                .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 },
-        };
-
-        /* Copy over the register allocated sources (coordinates). */
-        memcpy(&cubeface1.src, bundle->add->src, sizeof(cubeface1.src));
-
-        /* Zeroed by RA since this is all 32-bit */
-        for (unsigned i = 0; i < 3; ++i)
-                assert(bundle->add->swizzle[i][0] == 0);
+        bi_instr *new = rzalloc(ctx, bi_instr);
+        new->op = BI_OPCODE_CUBEFACE1;
+        /* no dest, just a temporary */
+        new->src[0] = old->src[0];
+        new->src[1] = old->src[1];
+        new->src[2] = old->src[2];
  
          /* Emit the instruction */
-        bundle->fma = bi_emit_before(ctx, bundle->add, cubeface1);
+        list_addtail(&new->link, &old->link);
+        bundle->fma = (bi_instruction *) new;
  
          /* Now replace the sources of the CUBEFACE2 with a single passthrough
           * from the CUBEFACE1 (and a side-channel) */
-        bundle->add->src[0] = BIR_INDEX_PASS | BIFROST_SRC_STAGE;
-        bundle->add->src[1] = bundle->add->src[2] = 0;
+        old->src[0] = bi_passthrough(BIFROST_SRC_STAGE);
+        old->src[1] = old->src[2] = bi_null();
+}
+
+static inline enum bifrost_packed_src
+bi_get_src_slot(bi_registers *regs, unsigned reg)
+{
+        if (regs->slot[0] == reg && regs->enabled[0])
+                return BIFROST_SRC_PORT0;
+        else if (regs->slot[1] == reg && regs->enabled[1])
+                return BIFROST_SRC_PORT1;
+        else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)
+                return BIFROST_SRC_PORT2;
+        else
+                unreachable("Tried to access register with no port");
+}
+
+static inline enum bifrost_packed_src
+bi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s)
+{
+        if (!ins)
+                return 0;
+
+        bi_index src = ins->src[s];
+
+        if (src.type == BI_INDEX_REGISTER)
+                return bi_get_src_slot(regs, src.value);
+        else if (src.type == BI_INDEX_PASS)
+                return src.value;
+        else if (bi_is_null(src) && ins->op == BI_OPCODE_ZS_EMIT && s < 2)
+                return BIFROST_SRC_STAGE;
+        else {
+                /* TODO make safer */
+                return BIFROST_SRC_STAGE;
+        }
  }
  
  static struct bi_packed_bundle
@@ -978,9 +1020,38 @@ bi_pack_bundle(bi_clause *clause, bi_bundle bundle, bi_bundle prev, bool first_b
  
          bi_flip_slots(&bundle.regs);
  
+        bool sr_read = bundle.add &&
+                bi_opcode_props[((bi_instr *) bundle.add)->op].sr_read;
+
          uint64_t reg = bi_pack_registers(bundle.regs);
-        uint64_t fma = pan_pack_fma(clause, bundle, &bundle.regs);
-        uint64_t add = pan_pack_add(clause, bundle, &bundle.regs, stage);
+        uint64_t fma = bi_pack_fma((bi_instr *) bundle.fma,
+                        bi_get_src_new((bi_instr *) bundle.fma, &bundle.regs, 0),
+                        bi_get_src_new((bi_instr *) bundle.fma, &bundle.regs, 1),
+                        bi_get_src_new((bi_instr *) bundle.fma, &bundle.regs, 2),
+                        bi_get_src_new((bi_instr *) bundle.fma, &bundle.regs, 3));
+
+        uint64_t add = bi_pack_add((bi_instr *) bundle.add,
+                        bi_get_src_new((bi_instr *) bundle.add, &bundle.regs, sr_read + 0),
+                        bi_get_src_new((bi_instr *) bundle.add, &bundle.regs, sr_read + 1),
+                        bi_get_src_new((bi_instr *) bundle.add, &bundle.regs, sr_read + 2),
+                        0);
+
+        if (bundle.add) {
+                bi_instr *add = (bi_instr *) bundle.add;
+
+                bool sr_write = bi_opcode_props[add->op].sr_write;
+
+                if (sr_read) {
+                        assert(add->src[0].type == BI_INDEX_REGISTER);
+                        clause->staging_register = add->src[0].value;
+
+                        if (sr_write)
+                                assert(bi_is_equiv(add->src[0], add->dest[0]));
+                } else if (sr_write) {
+                        assert(add->dest[0].type == BI_INDEX_REGISTER);
+                        clause->staging_register = add->dest[0].value;
+                }
+        }
  
          struct bi_packed_bundle packed = {
                  .lo = reg | (fma << 35) | ((add & 0b111111) << 58),
@@ -1022,8 +1093,8 @@ bi_pack_constants(bi_context *ctx, bi_clause *clause,
  
          /* Compute branch offset instead of a dummy 0 */
          if (branches) {
-                bi_instruction *br = clause->bundles[clause->bundle_count - 1].add;
-                assert(br && br->type == BI_BRANCH && br->branch_target);
+                bi_instr *br = (bi_instr *) clause->bundles[clause->bundle_count - 1].add;
+                assert(br && br->branch_target);
  
                  /* Put it in the high place */
                  int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
@@ -1074,7 +1145,7 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
                  struct util_dynarray *emission, gl_shader_stage stage,
                  bool tdd)
  {
-        /* After the deadline lowering */
+        /* TODO After the deadline lowering */
          bi_lower_cubeface2(ctx, &clause->bundles[0]);
  
          struct bi_packed_bundle ins_1 = bi_pack_bundle(clause, clause->bundles[0], clause->bundles[0], true, stage);
@@ -1148,9 +1219,9 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
                  return;
  
          const bi_bundle *bundle = &clause->bundles[clause->bundle_count - 1];
-        const bi_instruction *ins = bundle->add;
+        const bi_instr *ins = (bi_instr *) bundle->add;
  
-        if (!ins || ins->type != BI_BLEND)
+        if (!ins || ins->op != BI_OPCODE_BLEND)
                  return;
  
          /* We don't support non-terminal blend instructions yet.
@@ -1160,11 +1231,13 @@ bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
           */
          assert(0);
  
+#if 0
          assert(ins->blend_location < ARRAY_SIZE(ctx->blend_ret_offsets));
          assert(!ctx->blend_ret_offsets[ins->blend_location]);
          ctx->blend_ret_offsets[ins->blend_location] =
                  util_dynarray_num_elements(emission, uint8_t);
          assert(!(ctx->blend_ret_offsets[ins->blend_location] & 0x7));
+#endif
  }
  
  void
diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c

index 1785bf0..d897198 100644 (file)
--- a/src/panfrost/bifrost/bi_print.c
+++ b/src/panfrost/bifrost/bi_print.c
@@ -480,7 +480,7 @@ bi_print_bundle(bi_bundle *bundle, FILE *fp)
  
          for (unsigned i = 0; i < 2; ++i) {
                  if (ins[i])
-                        bi_print_instruction(ins[i], fp);
+                        bi_print_instr((bi_instr *) ins[i], fp);
                  else
                          fprintf(fp, "nop\n");
          }
@@ -536,7 +536,7 @@ bi_print_block(bi_block *block, FILE *fp)
                          bi_print_clause(clause, fp);
          } else {
                  bi_foreach_instr_in_block(block, ins)
-                        bi_print_instruction(ins, fp);
+                        bi_print_instr((bi_instr *) ins, fp);
          }
  
          fprintf(fp, "}");
diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c

index ab768aa..c14627b 100644 (file)
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
@@ -26,6 +26,7 @@
  
  #include "compiler.h"
  #include "bi_print.h"
+#include "bi_builder.h"
  #include "panfrost/util/lcra.h"
  #include "util/u_memory.h"
  
@@ -38,14 +39,18 @@ bi_compute_interference(bi_context *ctx, struct lcra_state *l)
                  bi_block *blk = (bi_block *) _blk;
                  uint16_t *live = mem_dup(_blk->live_out, l->node_count * sizeof(uint16_t));
  
-                bi_foreach_instr_in_block_rev(blk, ins) {
+                bi_foreach_instr_in_block_rev(blk, _ins) {
                          /* Mark all registers live after the instruction as
                           * interfering with the destination */
  
-                        if (ins->dest && (ins->dest < l->node_count)) {
+                        bi_instr *ins = (bi_instr *) _ins;
+                        for (unsigned d = 0; d < ARRAY_SIZE(ins->dest); ++d) {
+                                if (bi_get_node(ins->dest[d]) >= l->node_count)
+                                        continue;
+
                                  for (unsigned i = 1; i < l->node_count; ++i) {
                                          if (live[i])
-                                                lcra_add_node_interference(l, ins->dest, bi_writemask(ins), i, live[i]);
+                                                lcra_add_node_interference(l, bi_get_node(ins->dest[d]), bi_writemask_new(ins), i, live[i]);
                                  }
                          }
  
@@ -76,15 +81,19 @@ bi_allocate_registers(bi_context *ctx, bool *success)
          } else {
                  /* R0 - R63, all 32-bit */
                  l->class_start[BI_REG_CLASS_WORK] = 0;
-                l->class_size[BI_REG_CLASS_WORK] = 63 * 4;
+                l->class_size[BI_REG_CLASS_WORK] = 59 * 4;
          }
  
-        bi_foreach_instr_global(ctx, ins) {
-                unsigned dest = ins->dest;
+        bi_foreach_instr_global(ctx, _ins) {
+                bi_instr *ins = (bi_instr *) _ins;
+                unsigned dest = bi_get_node(ins->dest[0]);
  
                  /* Blend shaders expect the src colour to be in r0-r3 */
-                if (ins->type == BI_BLEND && !ctx->is_blend)
-                        l->solutions[ins->src[0]] = 0;
+                if (ins->op == BI_OPCODE_BLEND && !ctx->is_blend) {
+                        unsigned node = bi_get_node(ins->src[0]);
+                        assert(node < node_count);
+                        l->solutions[node] = 0;
+                }
  
                  if (!dest || (dest >= node_count))
                          continue;
@@ -102,87 +111,61 @@ bi_allocate_registers(bi_context *ctx, bool *success)
          return l;
  }
  
-static unsigned
-bi_reg_from_index(struct lcra_state *l, unsigned index, unsigned offset)
+static bi_index
+bi_reg_from_index(struct lcra_state *l, bi_index index)
  {
+        /* Offsets can only be applied when we register allocated an index, or
+         * alternatively for FAU's encoding */
+
+        ASSERTED bool is_offset = (index.offset > 0) &&
+                (index.type != BI_INDEX_FAU);
+
          /* Did we run RA for this index at all */
-        if (index >= l->node_count)
+        if (bi_get_node(index) >= l->node_count) {
+                assert(!is_offset);
                  return index;
+        }
  
          /* LCRA didn't bother solving this index (how lazy!) */
-        signed solution = l->solutions[index];
-        if (solution < 0)
+        signed solution = l->solutions[bi_get_node(index)];
+        if (solution < 0) {
+                assert(!is_offset);
                  return index;
+        }
  
          assert((solution & 0x3) == 0);
          unsigned reg = solution / 4;
-        reg += offset;
-
-        return BIR_INDEX_REGISTER | reg;
-}
-
-static void
-bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
-{
-        if (ins->src[src] >= l->node_count)
-                return;
-
-        bool vector = (bi_class_props[ins->type] & BI_VECTOR) && src == 0;
-        unsigned offset = 0;
-
-        if (vector) {
-                /* TODO: Do we do anything here? */
-        } else {
-                /* Use the swizzle as component select */
-                unsigned components = bi_get_component_count(ins, src);
-
-                nir_alu_type T = ins->src_types[src];
-                unsigned size = nir_alu_type_get_type_size(T);
-                unsigned components_per_word = MAX2(32 / size, 1);
-
-                for (unsigned i = 0; i < components; ++i) {
-                        unsigned off = ins->swizzle[src][i] / components_per_word;
-
-                        /* We can't cross register boundaries in a swizzle */
-                        if (i == 0)
-                                offset = off;
-                        else
-                                assert(off == offset);
-
-                        ins->swizzle[src][i] %= components_per_word;
-                }
-        }
-
-        ins->src[src] = bi_reg_from_index(l, ins->src[src], offset);
-}
-
-static void
-bi_adjust_dest_ra(bi_instruction *ins, struct lcra_state *l)
-{
-        if (ins->dest >= l->node_count)
-                return;
-
-        ins->dest = bi_reg_from_index(l, ins->dest, ins->dest_offset);
-        ins->dest_offset = 0;
+        reg += index.offset;
+
+        /* todo: do we want to compose with the subword swizzle? */
+        bi_index new_index = bi_register(reg);
+        new_index.swizzle = index.swizzle;
+        new_index.abs = index.abs;
+        new_index.neg = index.neg;
+        return new_index;
  }
  
  static void
  bi_install_registers(bi_context *ctx, struct lcra_state *l)
  {
-        bi_foreach_instr_global(ctx, ins) {
-                bi_adjust_dest_ra(ins, l);
+        bi_foreach_instr_global(ctx, _ins) {
+                bi_instr *ins = (bi_instr *) _ins;
+                ins->dest[0] = bi_reg_from_index(l, ins->dest[0]);
  
                  bi_foreach_src(ins, s)
-                        bi_adjust_src_ra(ins, l, s);
+                        ins->src[s] = bi_reg_from_index(l, ins->src[s]);
          }
  }
  
  static void
-bi_rewrite_index_src_single(bi_instruction *ins, unsigned old, unsigned new)
+bi_rewrite_index_src_single(bi_instr *ins, bi_index old, bi_index new)
  {
          bi_foreach_src(ins, i) {
-                if (ins->src[i] == old)
-                        ins->src[i] = new;
+                if (bi_is_equiv(ins->src[i], old)) {
+                        ins->src[i].type = new.type;
+                        ins->src[i].reg = new.reg;
+                        ins->src[i].value = new.value;
+                }
          }
  }
  
@@ -279,9 +262,12 @@ bi_choose_spill_node(bi_context *ctx, struct lcra_state *l)
  {
          /* Pick a node satisfying bi_spill_register's preconditions */
  
-        bi_foreach_instr_global(ctx, ins) {
-                if (ins->no_spill)
-                        lcra_set_node_spill_cost(l, ins->dest, -1);
+        bi_foreach_instr_global(ctx, _ins) {
+                bi_instr *ins = (bi_instr *) _ins;
+                if (ins->no_spill || ins->dest[0].offset || !bi_is_null(ins->dest[1])) {
+                        for (unsigned d = 0; d < ARRAY_SIZE(ins->dest); ++d)
+                                lcra_set_node_spill_cost(l, bi_get_node(ins->dest[0]), -1);
+                }
          }
  
          for (unsigned i = PAN_IS_REG; i < l->node_count; i += 2)
@@ -290,54 +276,75 @@ bi_choose_spill_node(bi_context *ctx, struct lcra_state *l)
          return lcra_get_best_spill_node(l);
  }
  
+static void
+bi_spill_dest(bi_builder *b, bi_index index, uint32_t offset,
+                bi_clause *clause, bi_block *block, bi_instr *ins,
+                uint32_t *channels)
+{
+        ins->dest[0] = bi_temp(b->shader);
+        ins->no_spill = true;
+
+        unsigned newc = util_last_bit(bi_writemask_new(ins)) >> 2;
+        *channels = MAX2(*channels, newc);
+
+        b->cursor = bi_after_instr(ins);
+
+        bi_instr *st = bi_store_to(b, (*channels) * 32, bi_null(),
+                        ins->dest[0], bi_imm_u32(offset), bi_zero(),
+                        BI_SEG_TL);
+
+        bi_clause *singleton = bi_singleton(b->shader, st, block, 0, (1 << 0),
+                        true);
+
+        list_add(&singleton->link, &clause->link);
+        b->shader->spills++;
+}
+
+static void
+bi_fill_src(bi_builder *b, bi_index index, uint32_t offset, bi_clause *clause,
+                bi_block *block, bi_instr *ins, unsigned channels)
+{
+        bi_index temp = bi_temp(b->shader);
+
+        b->cursor = bi_before_instr(ins);
+        bi_instr *ld = bi_load_to(b, channels * 32, temp, bi_imm_u32(offset),
+                        bi_zero(), BI_SEG_TL);
+        ld->no_spill = true;
+
+        bi_clause *singleton = bi_singleton(b->shader, ld, block, 0,
+                        (1 << 0), true);
+
+        list_addtail(&singleton->link, &clause->link);
+
+        /* Rewrite to use */
+        bi_rewrite_index_src_single((bi_instr *) ins, index, temp);
+        b->shader->fills++;
+}
+
  /* Once we've chosen a spill node, spill it. Precondition: node is a valid
   * SSA node in the non-optimized scheduled IR that was not already
   * spilled (enforced by bi_choose_spill_node). Returns bytes spilled */
  
  static unsigned
-bi_spill_register(bi_context *ctx, unsigned node, unsigned offset)
+bi_spill_register(bi_context *ctx, bi_index index, uint32_t offset)
  {
-        assert(!(node & PAN_IS_REG));
+        assert(!index.reg);
  
+        bi_builder _b = { .shader = ctx };
          unsigned channels = 1;
  
-        /* Spill after every store */
+        /* Spill after every store, fill before every load */
          bi_foreach_block(ctx, _block) {
                  bi_block *block = (bi_block *) _block;
                  bi_foreach_clause_in_block_safe(block, clause) {
-                        bi_instruction *ins = bi_unwrap_singleton(clause);
-
-                        if (ins->dest != node) continue;
-
-                        ins->dest = bi_make_temp(ctx);
-                        ins->no_spill = true;
-                        channels = MAX2(channels, ins->vector_channels);
-
-                        bi_instruction st = bi_spill(ins->dest, offset, channels);
-                        bi_insert_singleton(ctx, clause, block, st, false);
-                        ctx->spills++;
-                }
-        }
-
-        /* Fill before every use */
-        bi_foreach_block(ctx, _block) {
-                bi_block *block = (bi_block *) _block;
-                bi_foreach_clause_in_block_safe(block, clause) {
-                        bi_instruction *ins = bi_unwrap_singleton(clause);
-                        if (!bi_has_arg(ins, node)) continue;
-
-                        /* Don't rewrite spills themselves */
-                        if (ins->segment == BI_SEG_TL) continue;
-
-                        unsigned index = bi_make_temp(ctx);
-
-                        bi_instruction ld = bi_fill(index, offset, channels);
-                        ld.no_spill = true;
-                        bi_insert_singleton(ctx, clause, block, ld, true);
+                        bi_instr *ins = (bi_instr *) bi_unwrap_singleton(clause);
+                        if (bi_is_equiv(ins->dest[0], index)) {
+                                bi_spill_dest(&_b, index, offset, clause,
+                                                block, ins, &channels);
+                        }
  
-                        /* Rewrite to use */
-                        bi_rewrite_index_src_single(ins, node, index);
-                        ctx->fills++;
+                        if (bi_has_arg(ins, index))
+                                bi_fill_src(&_b, index, offset, clause, block, ins, channels);
                  }
          }
  
@@ -350,36 +357,23 @@ bi_register_allocate(bi_context *ctx)
          struct lcra_state *l = NULL;
          bool success = false;
  
-        unsigned iter_count = 100; /* max iterations */
+        unsigned iter_count = 1000; /* max iterations */
  
          /* Number of bytes of memory we've spilled into */
          unsigned spill_count = 0;
  
-        /* For instructions that both read and write from a data register, it's
-         * the *same* data register. We enforce that constraint by just doing a
-         * quick rewrite. TODO: are there cases where this causes RA to have no
-         * solutions due to copyprop? */
-        bi_foreach_instr_global(ctx, ins) {
-                unsigned props = bi_class_props[ins->type];
-                unsigned both = BI_DATA_REG_SRC | BI_DATA_REG_DEST;
-                if ((props & both) != both) continue;
-
-                assert(ins->src[0] & PAN_IS_REG);
-                bi_rewrite_uses(ctx, ins->dest, 0, ins->src[0], 0);
-                ins->dest = ins->src[0];
-        }
-
          do {
                  if (l) {
                          signed spill_node = bi_choose_spill_node(ctx, l);
                          lcra_free(l);
                          l = NULL;
  
-
                          if (spill_node == -1)
                                  unreachable("Failed to choose spill node\n");
  
-                        spill_count += bi_spill_register(ctx, spill_node, spill_count);
+                        spill_count += bi_spill_register(ctx,
+                                        bi_node_to_index(spill_node, bi_max_temp(ctx)),
+                                        spill_count);
                  }
  
                  bi_invalidate_liveness(ctx);
diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c

index 96613b6..03a0ca7 100644 (file)
--- a/src/panfrost/bifrost/bi_schedule.c
+++ b/src/panfrost/bifrost/bi_schedule.c
@@ -341,9 +341,9 @@ bi_schedule(bi_context *ctx)
  
                  bi_foreach_instr_in_block(bblock, ins) {
                          /* Convenient time to lower */
-                        bi_lower_fmov(ins);
+//                        bi_lower_fmov(ins);
  
-                        bi_clause *u = bi_make_singleton(ctx, ins,
+                        bi_clause *u = bi_singleton(ctx, (bi_instr *) ins,
                                          bblock, 0, (1 << 0),
                                          !is_first);
  
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c

index dffbe32..54305f0 100644 (file)
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -3703,8 +3703,10 @@ emit_block(bi_context *ctx, nir_block *block)
          list_addtail(&ctx->current_block->base.link, &ctx->blocks);
          list_inithead(&ctx->current_block->base.instructions);
  
+        bi_builder _b = bi_init_builder(ctx);
+
          nir_foreach_instr(instr, block) {
-                emit_instr(ctx, instr);
+                bi_emit_instr(&_b, instr);
                  ++ctx->instruction_count;
          }
  
@@ -3767,16 +3769,13 @@ emit_if(bi_context *ctx, nir_if *nif)
          bi_block *before_block = ctx->current_block;
  
          /* Speculatively emit the branch, but we can't fill it in until later */
-        bi_instruction *then_branch = bi_emit_branch(ctx);
-        bi_set_branch_cond(then_branch, &nif->condition, true);
+        bi_builder _b = bi_init_builder(ctx);
+        bi_instr *then_branch = bi_branch(&_b, &nif->condition, true);
  
          /* Emit the two subblocks. */
          bi_block *then_block = emit_cf_list(ctx, &nif->then_list);
          bi_block *end_then_block = ctx->current_block;
  
-        /* Emit a jump from the end of the then block to the end of the else */
-        bi_instruction *then_exit = bi_emit_branch(ctx);
-
          /* Emit second block, and check if it's empty */
  
          int count_in = ctx->instruction_count;
@@ -3790,13 +3789,15 @@ emit_if(bi_context *ctx, nir_if *nif)
          assert(else_block);
  
          if (ctx->instruction_count == count_in) {
-                /* The else block is empty, so don't emit an exit jump */
-                bi_remove_instruction(then_exit);
                  then_branch->branch_target = ctx->after_block;
                  pan_block_add_successor(&end_then_block->base, &ctx->after_block->base); /* fallthrough */
          } else {
                  then_branch->branch_target = else_block;
-                then_exit->branch_target = ctx->after_block;
+
+                /* Emit a jump from the end of the then block to the end of the else */
+                _b.cursor = bi_after_block(end_then_block);
+                bi_instr *then_exit = bi_jump(&_b, ctx->after_block);
+
                  pan_block_add_successor(&end_then_block->base, &then_exit->branch_target->base);
                  pan_block_add_successor(&end_else_block->base, &ctx->after_block->base); /* fallthrough */
          }
@@ -3822,8 +3823,8 @@ emit_loop(bi_context *ctx, nir_loop *nloop)
          emit_cf_list(ctx, &nloop->body);
  
          /* Branch back to loop back */
-        bi_instruction *br_back = bi_emit_branch(ctx);
-        br_back->branch_target = ctx->continue_block;
+        bi_builder _b = bi_init_builder(ctx);
+        bi_jump(&_b, ctx->continue_block);
          pan_block_add_successor(&start_block->base, &ctx->continue_block->base);
          pan_block_add_successor(&ctx->current_block->base, &ctx->continue_block->base);
  
@@ -4130,8 +4131,6 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                  /* Name blocks now that we're done emitting so the order is
                   * consistent */
                  block->base.name = block_source_count++;
-
-                bi_lower_combine(ctx, block);
          }
  
          bool progress = false;
@@ -4145,6 +4144,11 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
                  }
          } while(progress);
  
+        bi_foreach_block(ctx, _block) {
+                bi_block *block = (bi_block *) _block;
+                bi_lower_fau(ctx, block);
+        }
+
          if (bifrost_debug & BIFROST_DBG_SHADERS && !nir->info.internal)
                  bi_print_shader(ctx, stdout);
          bi_schedule(ctx);
diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c

index 8fa89f6..2992a70 100644 (file)
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@@ -63,13 +63,13 @@ bi_is_src_swizzled(bi_instruction *ins, unsigned s)
  }
  
  bool
-bi_has_arg(bi_instruction *ins, unsigned arg)
+bi_has_arg(bi_instr *ins, bi_index arg)
  {
          if (!ins)
                  return false;
  
          bi_foreach_src(ins, s) {
-                if (ins->src[s] == arg)
+                if (bi_is_equiv(ins->src[s], arg))
                          return true;
          }
  
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h

index 0debc16..b2d4f03 100644 (file)
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -1070,7 +1070,7 @@ void bi_emit_deriv(bi_context *ctx, nir_alu_instr *instr);
  bool bi_has_clamp(bi_instruction *ins);
  bool bi_has_source_mods(bi_instruction *ins);
  bool bi_is_src_swizzled(bi_instruction *ins, unsigned s);
-bool bi_has_arg(bi_instruction *ins, unsigned arg);
+bool bi_has_arg(bi_instr *ins, bi_index arg);
  uint16_t bi_from_bytemask(uint16_t bytemask, unsigned bytes);
  unsigned bi_get_component_count(bi_instruction *ins, signed s);
  uint16_t bi_bytemask_of_read_components(bi_instruction *ins, unsigned node);
@@ -1106,7 +1106,7 @@ bi_clause *bi_make_singleton(void *memctx, bi_instruction *ins,
  /* Liveness */
  
  void bi_compute_liveness(bi_context *ctx);
-void bi_liveness_ins_update(uint16_t *live, bi_instruction *ins, unsigned max);
+void bi_liveness_ins_update(uint16_t *live, bi_instr *ins, unsigned max);
  void bi_invalidate_liveness(bi_context *ctx);
  
  /* Layout */
author	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
	Wed, 30 Dec 2020 20:50:50 +0000 (15:50 -0500)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 31 Dec 2020 14:39:02 +0000 (14:39 +0000)
.gitlab-ci/deqp-panfrost-g52-fails.txt		patch \| blob \| history
src/panfrost/bifrost/bi_liveness.c		patch \| blob \| history
src/panfrost/bifrost/bi_opt_dce.c		patch \| blob \| history
src/panfrost/bifrost/bi_pack.c		patch \| blob \| history
src/panfrost/bifrost/bi_print.c		patch \| blob \| history
src/panfrost/bifrost/bi_ra.c		patch \| blob \| history
src/panfrost/bifrost/bi_schedule.c		patch \| blob \| history
src/panfrost/bifrost/bifrost_compile.c		patch \| blob \| history
src/panfrost/bifrost/bir.c		patch \| blob \| history
src/panfrost/bifrost/compiler.h		patch \| blob \| history