pan/bi: Switch to new scheduler
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 6 Jan 2021 20:02:28 +0000 (15:02 -0500)
committerMarge Bot <eric+marge@anholt.net>
Mon, 8 Feb 2021 14:07:29 +0000 (14:07 +0000)
Delete the old.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8354>

src/panfrost/bifrost/bi_pack.c
src/panfrost/bifrost/bi_ra.c
src/panfrost/bifrost/bi_schedule.c
src/panfrost/bifrost/bifrost_compile.c
src/panfrost/bifrost/compiler.h

index df3f09030fbaa4679a3e2b31cfd53a433b3cd1ce..2bac35d5e55959866fc245cb8e9477d78d604e5e 100644 (file)
@@ -463,7 +463,7 @@ static struct bi_packed_tuple
 bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage)
 {
         bi_assign_slots(tuple, prev);
-        bi_assign_fau_idx(clause, tuple);
+        tuple->regs.fau_idx = tuple->fau_idx;
         tuple->regs.first_instruction = first_tuple;
 
         bi_flip_slots(&tuple->regs);
@@ -509,36 +509,54 @@ bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tup
         return packed;
 }
 
-/* Packs the next two constants as a dedicated constant quadword at the end of
- * the clause, returning the number packed. There are two cases to consider:
- *
- * Case #1: Branching is not used. For a single constant copy the upper nibble
- * over, easy.
- *
- * Case #2: Branching is used. For a single constant, it suffices to set the
- * upper nibble to 4 and leave the latter constant 0, which matches what the
- * blob does.
- *
- * Extending to multiple constants is considerably more tricky and left for
- * future work.
+/* A block contains at most one PC-relative constant, from a terminal branch.
+ * Find the last instruction and if it is a relative branch, fix up the
+ * PC-relative constant to contain the absolute offset. This occurs at pack
+ * time instead of schedule time because the number of quadwords between each
+ * block is not known until after all other passes have finished.
  */
 
-static unsigned
-bi_pack_constants(bi_context *ctx, bi_clause *clause,
-                unsigned word_idx, bool ec0_packed,
+static void
+bi_assign_branch_offset(bi_context *ctx, bi_block *block)
+{
+        if (list_is_empty(&block->clauses))
+                return;
+
+        bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link);
+        bi_instr *br = bi_last_instr_in_clause(clause);
+
+        if (!br->branch_target)
+                return;
+
+        /* Put it in the high place */
+        int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
+        int32_t bytes = qwords * 16;
+
+        /* Copy so we can toy with the sign without undefined behaviour */
+        uint32_t raw = 0;
+        memcpy(&raw, &bytes, sizeof(raw));
+
+        /* Clear off top bits for A1/B1 bits */
+        raw &= ~0xF0000000;
+
+        /* Put in top 32-bits */
+        assert(clause->pcrel_idx < 8);
+        clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull;
+}
+
+static void
+bi_pack_constants(unsigned tuple_count, uint64_t *constants,
+                unsigned word_idx, unsigned constant_words, bool ec0_packed,
                 struct util_dynarray *emission)
 {
         unsigned index = (word_idx << 1) + ec0_packed;
 
-        /* After these two, are we done? Determines tag */
-        bool done = clause->constant_count <= (index + 2);
-
-        /* Is the constant we're packing for a branch? */
-        bool branches = clause->branch_constant && done;
+        /* Do more constants follow */
+        bool more = (word_idx + 1) < constant_words;
 
         /* Indexed first by tuple count and second by constant word number,
          * indicates the position in the clause */
-        unsigned pos[8][3] = {
+        unsigned pos_lookup[8][3] = {
                 { 0 },
                 { 1 },
                 { 3 },
@@ -549,57 +567,20 @@ bi_pack_constants(bi_context *ctx, bi_clause *clause,
                 { 9, 12 }
         };
 
-        /* Compute branch offset instead of a dummy 0 */
-        bool terminal_branch = true;
-
-        if (branches) {
-                bi_instr *br = clause->tuples[clause->tuple_count - 1].add;
-                assert(br && br->branch_target);
-
-                if (!bi_is_terminal_block(br->branch_target)) {
-                        /* Put it in the high place */
-                        int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
-                        int32_t bytes = qwords * 16;
-
-                        /* Copy so we get proper sign behaviour */
-                        uint32_t raw = 0;
-                        memcpy(&raw, &bytes, sizeof(raw));
-
-                        /* Clear off top bits for the magic bits */
-                        raw &= ~0xF0000000;
-                        terminal_branch = false;
-
-                        /* Put in top 32-bits */
-                        clause->constants[index + 0] = ((uint64_t) raw) << 32ull;
-               }
-        }
-
-        uint64_t hi = clause->constants[index + 0] >> 60ull;
+        /* Compute the pos, and check everything is reasonable */
+        assert((tuple_count - 1) < 8);
+        assert(word_idx < 3);
+        unsigned pos = pos_lookup[tuple_count - 1][word_idx];
+        assert(pos != 0 || (tuple_count == 1 && word_idx == 0));
 
         struct bifrost_fmt_constant quad = {
-                .pos = pos[clause->tuple_count - 1][word_idx], /* TODO */
-                .tag = done ? BIFROST_FMTC_FINAL : BIFROST_FMTC_CONSTANTS,
-                .imm_1 = clause->constants[index + 0] >> 4,
-                .imm_2 = ((hi < 8) ? (hi << 60ull) : 0) >> 4,
+                .pos = pos,
+                .tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL,
+                .imm_1 = constants[index + 0] >> 4,
+                .imm_2 = constants[index + 1] >> 4,
         };
 
-        if (branches && !terminal_branch) {
-                /* Branch offsets are less than 60-bits so this should work at
-                 * least for now */
-                quad.imm_1 |= (4ull << 60ull) >> 4;
-                assert (hi == 0);
-        }
-
-        /* XXX: On G71, Connor observed that the difference of the top 4 bits
-         * of the second constant with the first must be less than 8, otherwise
-         * we have to swap them. On G52, I'm able to reproduce a similar issue
-         * but with a different workaround (modeled above with a single
-         * constant, unclear how to workaround for multiple constants.) Further
-         * investigation needed. Possibly an errata. XXX */
-
         util_dynarray_append(emission, struct bifrost_fmt_constant, quad);
-
-        return 2;
 }
 
 static inline uint8_t
@@ -800,9 +781,6 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
                 struct util_dynarray *emission, gl_shader_stage stage,
                 bool tdd)
 {
-        /* TODO After the deadline lowering */
-        bi_lower_cubeface2(ctx, &clause->tuples[0]);
-
         struct bi_packed_tuple ins[8] = { 0 };
 
         for (unsigned i = 0; i < clause->tuple_count; ++i) {
@@ -857,8 +835,8 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
         /* Pack the remaining constants */
 
         for (unsigned pos = 0; pos < constant_quads; ++pos) {
-                bi_pack_constants(ctx, clause, pos, ec0_packed,
-                                emission);
+                bi_pack_constants(clause->tuple_count, clause->constants,
+                                pos, constant_quads, ec0_packed, emission);
         }
 }
 
@@ -909,6 +887,8 @@ bi_pack(bi_context *ctx, struct util_dynarray *emission)
         bi_foreach_block(ctx, _block) {
                 bi_block *block = (bi_block *) _block;
 
+                bi_assign_branch_offset(ctx, block);
+
                 /* Passthrough the first clause of where we're branching to for
                  * the last clause of the block (the clause with the branch) */
 
index ed2b1c3de7113336546928faa82192a436dc4774..a4f1fe81444cd02be8b5996bbc3f6328ef76c9b3 100644 (file)
@@ -266,11 +266,14 @@ bi_spill_dest(bi_builder *b, bi_index index, bi_index temp, uint32_t offset,
 {
         b->cursor = bi_after_clause(clause);
 
-        bi_instr *st = bi_store_to(b, channels * 32, bi_null(),
-                        temp, bi_imm_u32(offset), bi_zero(), BI_SEG_TL);
+        /* setup FAU as [offset][0] */
+        bi_instr *st = bi_store_to(b, channels * 32, bi_null(), temp,
+                        bi_passthrough(BIFROST_SRC_FAU_LO),
+                        bi_passthrough(BIFROST_SRC_FAU_HI),
+                        BI_SEG_TL);
 
         bi_clause *singleton = bi_singleton(b->shader, st, block, 0, (1 << 0),
-                        true);
+                        offset, true);
 
         list_add(&singleton->link, &clause->link);
         b->shader->spills++;
@@ -281,12 +284,14 @@ bi_fill_src(bi_builder *b, bi_index index, bi_index temp, uint32_t offset,
                 bi_clause *clause, bi_block *block, unsigned channels)
 {
         b->cursor = bi_before_clause(clause);
-        bi_instr *ld = bi_load_to(b, channels * 32, temp, bi_imm_u32(offset),
-                        bi_zero(), BI_SEG_TL);
+        bi_instr *ld = bi_load_to(b, channels * 32, temp,
+                        bi_passthrough(BIFROST_SRC_FAU_LO),
+                        bi_passthrough(BIFROST_SRC_FAU_HI),
+                        BI_SEG_TL);
         ld->no_spill = true;
 
         bi_clause *singleton = bi_singleton(b->shader, ld, block, 0,
-                        (1 << 0), true);
+                        (1 << 0), offset, true);
 
         list_addtail(&singleton->link, &clause->link);
         b->shader->fills++;
index 29efa23c228d48a7482c846c01f9e67a89026bef..46e31646134c815b20861c1e2ecd14480724aca7 100644 (file)
@@ -239,6 +239,7 @@ bi_singleton(void *memctx, bi_instr *ins,
                 bi_block *block,
                 unsigned scoreboard_id,
                 unsigned dependencies,
+                uint64_t combined_constant,
                 bool osrb)
 {
         bi_clause *u = rzalloc(memctx, bi_clause);
@@ -266,42 +267,14 @@ bi_singleton(void *memctx, bi_instr *ins,
         /* Let's be optimistic, we'll fix up later */
         u->flow_control = BIFROST_FLOW_NBTB;
 
-        /* Build up a combined constant, count in 32-bit words */
-        uint64_t combined_constant = 0;
-        unsigned constant_count = 0;
+        assert(!ins->branch_target);
 
-        bi_foreach_src(ins, s) {
-                if (ins->src[s].type != BI_INDEX_CONSTANT) continue;
-                unsigned value = ins->src[s].value;
-
-                /* Allow fast zero */
-                if (value == 0 && u->tuples[0].fma) continue;
-
-                if (constant_count == 0) {
-                        combined_constant = ins->src[s].value;
-                } else if (constant_count == 1) {
-                        /* Allow reuse */
-                        if (combined_constant == value)
-                                continue;
-
-                        combined_constant |= ((uint64_t) value) << 32ull;
-                } else {
-                        /* No more room! */
-                        assert((combined_constant & 0xffffffff) == value ||
-                                        (combined_constant >> 32ull) == value);
-                }
-
-                constant_count++;
-        }
-
-        if (ins->branch_target)
-                u->branch_constant = true;
-
-        /* XXX: Investigate errors when constants are not used */
-        if (constant_count || u->branch_constant || true) {
+        if (combined_constant) {
                 /* Clause in 64-bit, above in 32-bit */
                 u->constant_count = 1;
                 u->constants[0] = combined_constant;
+                u->tuples[0].fau_idx = bi_constant_field(0) |
+                        (combined_constant & 0xF);
         }
 
         u->next_clause_prefetch = (ins->op != BI_OPCODE_JUMP);
@@ -414,44 +387,6 @@ bi_reads_t(bi_instr *ins, unsigned src)
         }
 }
 
-/* Eventually, we'll need a proper scheduling, grouping instructions
- * into clauses and ordering/assigning grouped instructions to the
- * appropriate FMA/ADD slots. Right now we do the dumbest possible
- * thing just to have the scheduler stubbed out so we can focus on
- * codegen */
-
-void
-bi_schedule(bi_context *ctx)
-{
-        bool is_first = true;
-
-        bi_foreach_block(ctx, block) {
-                bi_block *bblock = (bi_block *) block;
-
-                list_inithead(&bblock->clauses);
-
-                bi_foreach_instr_in_block(bblock, ins) {
-                        bi_clause *u = bi_singleton(ctx, ins,
-                                        bblock, 0, (1 << 0),
-                                        !is_first);
-
-                        is_first = false;
-                        list_addtail(&u->link, &bblock->clauses);
-                }
-
-                /* Back-to-back bit affects only the last clause of a block,
-                 * the rest are implicitly true */
-
-                if (!list_is_empty(&bblock->clauses)) {
-                        bi_clause *last_clause = list_last_entry(&bblock->clauses, bi_clause, link);
-                        if (!bi_back_to_back(bblock))
-                                last_clause->flow_control = BIFROST_FLOW_NBTB_UNCONDITIONAL;
-                }
-
-                bblock->scheduled = true;
-        }
-}
-
 /* Counts the number of 64-bit constants required by a clause. TODO: We
  * might want to account for merging, right now we overestimate, but
  * that's probably fine most of the time */
@@ -1427,6 +1362,16 @@ bi_schedule_block(bi_context *ctx, bi_block *block)
         bi_free_worklist(st);
 }
 
+void
+bi_schedule(bi_context *ctx)
+{
+        bi_foreach_block(ctx, block) {
+                bi_block *bblock = (bi_block *) block;
+                bi_schedule_block(ctx, bblock);
+                bi_opt_dead_code_eliminate(ctx, bblock, true);
+        }
+}
+
 #ifndef NDEBUG
 
 static bi_builder *
index 19830549472729a793fbf47961b88559b199b4ed..0a8f299f9835c874aef83f3ef60939f203273a94 100644 (file)
@@ -1669,22 +1669,17 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
                     bi_index *face, bi_index *s, bi_index *t)
 {
         /* Compute max { |x|, |y|, |z| } */
-        bi_index cubeface1 = bi_cubeface1(b, coord,
+        bi_instr *cubeface = bi_cubeface_to(b, bi_temp(b->shader), coord,
                         bi_word(coord, 1), bi_word(coord, 2));
-
-        /* Calculate packed exponent / face / infinity. In reality this reads
-         * the destination from cubeface1 but that's handled by lowering */
-        bi_instr *cubeface2 = bi_cubeface1_to(b, bi_temp(b->shader), coord,
-                        bi_word(coord, 1), bi_word(coord, 2));
-        cubeface2->op = BI_OPCODE_CUBEFACE2; /* XXX: DEEP VOODOO */
+        cubeface->dest[1] = bi_temp(b->shader);
 
         /* Select coordinates */
 
         bi_index ssel = bi_cube_ssel(b, bi_word(coord, 2), coord,
-                        cubeface2->dest[0]);
+                        cubeface->dest[1]);
 
         bi_index tsel = bi_cube_tsel(b, bi_word(coord, 1), bi_word(coord, 2),
-                        cubeface2->dest[0]);
+                        cubeface->dest[1]);
 
         /* The OpenGL ES specification requires us to transform an input vector
          * (x, y, z) to the coordinate, given the selected S/T:
@@ -1700,7 +1695,7 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
          * Take the reciprocal of max{x, y, z}
          */
 
-        bi_index rcp = bi_frcp_f32(b, cubeface1);
+        bi_index rcp = bi_frcp_f32(b, cubeface->dest[0]);
 
         /* Calculate 0.5 * (1.0 / max{x, y, z}) */
         bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_zero(),
@@ -1722,7 +1717,7 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
          * because the TEXS_CUBE and TEXC instructions expect the face index to
          * be at this position.
          */
-        *face = cubeface2->dest[0];
+        *face = cubeface->dest[1];
 }
 
 /* Emits a cube map descriptor, returning lower 32-bits and putting upper
index 138d49d6f497ddc8ce8de2a9f918f5b170454103..346ac86a72bfaf6f05f8ab2d1b90b65064898ef4 100644 (file)
@@ -743,6 +743,7 @@ bi_singleton(void *memctx, bi_instr *ins,
                 bi_block *block,
                 unsigned scoreboard_id,
                 unsigned dependencies,
+                uint64_t combined_constant,
                 bool osrb);
 
 /* Liveness */