Delete the old.
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8354>
bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage)
{
bi_assign_slots(tuple, prev);
- bi_assign_fau_idx(clause, tuple);
+ tuple->regs.fau_idx = tuple->fau_idx;
tuple->regs.first_instruction = first_tuple;
bi_flip_slots(&tuple->regs);
return packed;
}
-/* Packs the next two constants as a dedicated constant quadword at the end of
- * the clause, returning the number packed. There are two cases to consider:
- *
- * Case #1: Branching is not used. For a single constant copy the upper nibble
- * over, easy.
- *
- * Case #2: Branching is used. For a single constant, it suffices to set the
- * upper nibble to 4 and leave the latter constant 0, which matches what the
- * blob does.
- *
- * Extending to multiple constants is considerably more tricky and left for
- * future work.
+/* A block contains at most one PC-relative constant, from a terminal branch.
+ * Find the last instruction and if it is a relative branch, fix up the
+ * PC-relative constant to contain the absolute offset. This occurs at pack
+ * time instead of schedule time because the number of quadwords between each
+ * block is not known until after all other passes have finished.
*/
-static unsigned
-bi_pack_constants(bi_context *ctx, bi_clause *clause,
- unsigned word_idx, bool ec0_packed,
+static void
+bi_assign_branch_offset(bi_context *ctx, bi_block *block)
+{
+ if (list_is_empty(&block->clauses))
+ return;
+
+ bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link);
+ bi_instr *br = bi_last_instr_in_clause(clause);
+
+ if (!br->branch_target)
+ return;
+
+ /* Put it in the high place */
+ int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
+ int32_t bytes = qwords * 16;
+
+ /* Copy so we can toy with the sign without undefined behaviour */
+ uint32_t raw = 0;
+ memcpy(&raw, &bytes, sizeof(raw));
+
+ /* Clear off top bits for A1/B1 bits */
+ raw &= ~0xF0000000;
+
+ /* Put in top 32-bits */
+ assert(clause->pcrel_idx < 8);
+ clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull;
+}
+
+static void
+bi_pack_constants(unsigned tuple_count, uint64_t *constants,
+ unsigned word_idx, unsigned constant_words, bool ec0_packed,
struct util_dynarray *emission)
{
unsigned index = (word_idx << 1) + ec0_packed;
- /* After these two, are we done? Determines tag */
- bool done = clause->constant_count <= (index + 2);
-
- /* Is the constant we're packing for a branch? */
- bool branches = clause->branch_constant && done;
+ /* Do more constants follow */
+ bool more = (word_idx + 1) < constant_words;
/* Indexed first by tuple count and second by constant word number,
* indicates the position in the clause */
- unsigned pos[8][3] = {
+ unsigned pos_lookup[8][3] = {
{ 0 },
{ 1 },
{ 3 },
{ 9, 12 }
};
- /* Compute branch offset instead of a dummy 0 */
- bool terminal_branch = true;
-
- if (branches) {
- bi_instr *br = clause->tuples[clause->tuple_count - 1].add;
- assert(br && br->branch_target);
-
- if (!bi_is_terminal_block(br->branch_target)) {
- /* Put it in the high place */
- int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
- int32_t bytes = qwords * 16;
-
- /* Copy so we get proper sign behaviour */
- uint32_t raw = 0;
- memcpy(&raw, &bytes, sizeof(raw));
-
- /* Clear off top bits for the magic bits */
- raw &= ~0xF0000000;
- terminal_branch = false;
-
- /* Put in top 32-bits */
- clause->constants[index + 0] = ((uint64_t) raw) << 32ull;
- }
- }
-
- uint64_t hi = clause->constants[index + 0] >> 60ull;
+ /* Compute the pos, and check everything is reasonable */
+ assert((tuple_count - 1) < 8);
+ assert(word_idx < 3);
+ unsigned pos = pos_lookup[tuple_count - 1][word_idx];
+ assert(pos != 0 || (tuple_count == 1 && word_idx == 0));
struct bifrost_fmt_constant quad = {
- .pos = pos[clause->tuple_count - 1][word_idx], /* TODO */
- .tag = done ? BIFROST_FMTC_FINAL : BIFROST_FMTC_CONSTANTS,
- .imm_1 = clause->constants[index + 0] >> 4,
- .imm_2 = ((hi < 8) ? (hi << 60ull) : 0) >> 4,
+ .pos = pos,
+ .tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL,
+ .imm_1 = constants[index + 0] >> 4,
+ .imm_2 = constants[index + 1] >> 4,
};
- if (branches && !terminal_branch) {
- /* Branch offsets are less than 60-bits so this should work at
- * least for now */
- quad.imm_1 |= (4ull << 60ull) >> 4;
- assert (hi == 0);
- }
-
- /* XXX: On G71, Connor observed that the difference of the top 4 bits
- * of the second constant with the first must be less than 8, otherwise
- * we have to swap them. On G52, I'm able to reproduce a similar issue
- * but with a different workaround (modeled above with a single
- * constant, unclear how to workaround for multiple constants.) Further
- * investigation needed. Possibly an errata. XXX */
-
util_dynarray_append(emission, struct bifrost_fmt_constant, quad);
-
- return 2;
}
static inline uint8_t
struct util_dynarray *emission, gl_shader_stage stage,
bool tdd)
{
- /* TODO After the deadline lowering */
- bi_lower_cubeface2(ctx, &clause->tuples[0]);
-
struct bi_packed_tuple ins[8] = { 0 };
for (unsigned i = 0; i < clause->tuple_count; ++i) {
/* Pack the remaining constants */
for (unsigned pos = 0; pos < constant_quads; ++pos) {
- bi_pack_constants(ctx, clause, pos, ec0_packed,
- emission);
+ bi_pack_constants(clause->tuple_count, clause->constants,
+ pos, constant_quads, ec0_packed, emission);
}
}
bi_foreach_block(ctx, _block) {
bi_block *block = (bi_block *) _block;
+ bi_assign_branch_offset(ctx, block);
+
/* Passthrough the first clause of where we're branching to for
* the last clause of the block (the clause with the branch) */
{
b->cursor = bi_after_clause(clause);
- bi_instr *st = bi_store_to(b, channels * 32, bi_null(),
- temp, bi_imm_u32(offset), bi_zero(), BI_SEG_TL);
+ /* setup FAU as [offset][0] */
+ bi_instr *st = bi_store_to(b, channels * 32, bi_null(), temp,
+ bi_passthrough(BIFROST_SRC_FAU_LO),
+ bi_passthrough(BIFROST_SRC_FAU_HI),
+ BI_SEG_TL);
bi_clause *singleton = bi_singleton(b->shader, st, block, 0, (1 << 0),
- true);
+ offset, true);
list_add(&singleton->link, &clause->link);
b->shader->spills++;
bi_clause *clause, bi_block *block, unsigned channels)
{
b->cursor = bi_before_clause(clause);
- bi_instr *ld = bi_load_to(b, channels * 32, temp, bi_imm_u32(offset),
- bi_zero(), BI_SEG_TL);
+ bi_instr *ld = bi_load_to(b, channels * 32, temp,
+ bi_passthrough(BIFROST_SRC_FAU_LO),
+ bi_passthrough(BIFROST_SRC_FAU_HI),
+ BI_SEG_TL);
ld->no_spill = true;
bi_clause *singleton = bi_singleton(b->shader, ld, block, 0,
- (1 << 0), true);
+ (1 << 0), offset, true);
list_addtail(&singleton->link, &clause->link);
b->shader->fills++;
bi_block *block,
unsigned scoreboard_id,
unsigned dependencies,
+ uint64_t combined_constant,
bool osrb)
{
bi_clause *u = rzalloc(memctx, bi_clause);
/* Let's be optimistic, we'll fix up later */
u->flow_control = BIFROST_FLOW_NBTB;
- /* Build up a combined constant, count in 32-bit words */
- uint64_t combined_constant = 0;
- unsigned constant_count = 0;
+ assert(!ins->branch_target);
- bi_foreach_src(ins, s) {
- if (ins->src[s].type != BI_INDEX_CONSTANT) continue;
- unsigned value = ins->src[s].value;
-
- /* Allow fast zero */
- if (value == 0 && u->tuples[0].fma) continue;
-
- if (constant_count == 0) {
- combined_constant = ins->src[s].value;
- } else if (constant_count == 1) {
- /* Allow reuse */
- if (combined_constant == value)
- continue;
-
- combined_constant |= ((uint64_t) value) << 32ull;
- } else {
- /* No more room! */
- assert((combined_constant & 0xffffffff) == value ||
- (combined_constant >> 32ull) == value);
- }
-
- constant_count++;
- }
-
- if (ins->branch_target)
- u->branch_constant = true;
-
- /* XXX: Investigate errors when constants are not used */
- if (constant_count || u->branch_constant || true) {
+ if (combined_constant) {
/* Clause in 64-bit, above in 32-bit */
u->constant_count = 1;
u->constants[0] = combined_constant;
+ u->tuples[0].fau_idx = bi_constant_field(0) |
+ (combined_constant & 0xF);
}
u->next_clause_prefetch = (ins->op != BI_OPCODE_JUMP);
}
}
-/* Eventually, we'll need a proper scheduling, grouping instructions
- * into clauses and ordering/assigning grouped instructions to the
- * appropriate FMA/ADD slots. Right now we do the dumbest possible
- * thing just to have the scheduler stubbed out so we can focus on
- * codegen */
-
-void
-bi_schedule(bi_context *ctx)
-{
- bool is_first = true;
-
- bi_foreach_block(ctx, block) {
- bi_block *bblock = (bi_block *) block;
-
- list_inithead(&bblock->clauses);
-
- bi_foreach_instr_in_block(bblock, ins) {
- bi_clause *u = bi_singleton(ctx, ins,
- bblock, 0, (1 << 0),
- !is_first);
-
- is_first = false;
- list_addtail(&u->link, &bblock->clauses);
- }
-
- /* Back-to-back bit affects only the last clause of a block,
- * the rest are implicitly true */
-
- if (!list_is_empty(&bblock->clauses)) {
- bi_clause *last_clause = list_last_entry(&bblock->clauses, bi_clause, link);
- if (!bi_back_to_back(bblock))
- last_clause->flow_control = BIFROST_FLOW_NBTB_UNCONDITIONAL;
- }
-
- bblock->scheduled = true;
- }
-}
-
/* Counts the number of 64-bit constants required by a clause. TODO: We
* might want to account for merging, right now we overestimate, but
* that's probably fine most of the time */
bi_free_worklist(st);
}
+void
+bi_schedule(bi_context *ctx)
+{
+ bi_foreach_block(ctx, block) {
+ bi_block *bblock = (bi_block *) block;
+ bi_schedule_block(ctx, bblock);
+ bi_opt_dead_code_eliminate(ctx, bblock, true);
+ }
+}
+
#ifndef NDEBUG
static bi_builder *
bi_index *face, bi_index *s, bi_index *t)
{
/* Compute max { |x|, |y|, |z| } */
- bi_index cubeface1 = bi_cubeface1(b, coord,
+ bi_instr *cubeface = bi_cubeface_to(b, bi_temp(b->shader), coord,
bi_word(coord, 1), bi_word(coord, 2));
-
- /* Calculate packed exponent / face / infinity. In reality this reads
- * the destination from cubeface1 but that's handled by lowering */
- bi_instr *cubeface2 = bi_cubeface1_to(b, bi_temp(b->shader), coord,
- bi_word(coord, 1), bi_word(coord, 2));
- cubeface2->op = BI_OPCODE_CUBEFACE2; /* XXX: DEEP VOODOO */
+ cubeface->dest[1] = bi_temp(b->shader);
/* Select coordinates */
bi_index ssel = bi_cube_ssel(b, bi_word(coord, 2), coord,
- cubeface2->dest[0]);
+ cubeface->dest[1]);
bi_index tsel = bi_cube_tsel(b, bi_word(coord, 1), bi_word(coord, 2),
- cubeface2->dest[0]);
+ cubeface->dest[1]);
/* The OpenGL ES specification requires us to transform an input vector
* (x, y, z) to the coordinate, given the selected S/T:
* Take the reciprocal of max{x, y, z}
*/
- bi_index rcp = bi_frcp_f32(b, cubeface1);
+ bi_index rcp = bi_frcp_f32(b, cubeface->dest[0]);
/* Calculate 0.5 * (1.0 / max{x, y, z}) */
bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_zero(),
* because the TEXS_CUBE and TEXC instructions expect the face index to
* be at this position.
*/
- *face = cubeface2->dest[0];
+ *face = cubeface->dest[1];
}
/* Emits a cube map descriptor, returning lower 32-bits and putting upper
bi_block *block,
unsigned scoreboard_id,
unsigned dependencies,
+ uint64_t combined_constant,
bool osrb);
/* Liveness */