From a8fbfcfbd3ff2c828f99ffc57f9dd4acc340d902 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 4 Jun 2021 14:44:56 +0200 Subject: [PATCH] pan/midg: Support 8/16 bit load/store Needed for panvk copy shaders to support 8 or 16bit formats. Signed-off-by: Boris Brezillon Part-of: --- src/panfrost/midgard/midgard_compile.c | 64 ++++++++++++++++---- src/panfrost/midgard/midgard_emit.c | 103 +++++++++++++++------------------ src/panfrost/midgard/midgard_opt_dce.c | 2 +- src/panfrost/midgard/midgard_ra.c | 2 +- 4 files changed, 101 insertions(+), 70 deletions(-) diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 7e979df..f63e0ae 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -134,9 +134,13 @@ M_LOAD(ld_vary_32, nir_type_uint32); M_LOAD(ld_ubo_32, nir_type_uint32); M_LOAD(ld_ubo_64, nir_type_uint32); M_LOAD(ld_ubo_128, nir_type_uint32); +M_LOAD(ld_u8, nir_type_uint8); +M_LOAD(ld_u16, nir_type_uint16); M_LOAD(ld_32, nir_type_uint32); M_LOAD(ld_64, nir_type_uint32); M_LOAD(ld_128, nir_type_uint32); +M_STORE(st_u8, nir_type_uint8); +M_STORE(st_u16, nir_type_uint16); M_STORE(st_32, nir_type_uint32); M_STORE(st_64, nir_type_uint32); M_STORE(st_128, nir_type_uint32); @@ -1198,7 +1202,8 @@ mir_set_intr_mask(nir_instr *instr, midgard_instruction *ins, bool is_read) dsize = nir_dest_bit_size(intr->dest); } else { nir_mask = nir_intrinsic_write_mask(intr); - dsize = 32; + dsize = OP_IS_COMMON_STORE(ins->op) ? + nir_src_bit_size(intr->src[0]) : 32; } /* Once we have the NIR mask, we need to normalize to work in 32-bit space */ @@ -1283,19 +1288,55 @@ emit_global( unsigned bitsize = nir_dest_bit_size(intr->dest) * nir_dest_num_components(intr->dest); - if (bitsize <= 32) - ins = m_ld_32(srcdest, 0); - else if (bitsize <= 64) - ins = m_ld_64(srcdest, 0); - else if (bitsize <= 128) - ins = m_ld_128(srcdest, 0); - else - unreachable("Invalid global read size"); + switch (bitsize) { + case 8: ins = m_ld_u8(srcdest, 0); break; + case 16: ins = m_ld_u16(srcdest, 0); break; + case 32: ins = m_ld_32(srcdest, 0); break; + case 64: ins = m_ld_64(srcdest, 0); break; + case 128: ins = m_ld_128(srcdest, 0); break; + default: unreachable("Invalid global read size"); + } + + mir_set_intr_mask(instr, &ins, is_read); + + /* For anything not aligned on 32bit, make sure we write full + * 32 bits registers. */ + if (bitsize & 31) { + unsigned comps_per_32b = 32 / nir_dest_bit_size(intr->dest); + + for (unsigned c = 0; c < 4 * comps_per_32b; c += comps_per_32b) { + if (!(ins.mask & BITFIELD_RANGE(c, comps_per_32b))) + continue; + + unsigned base = ~0; + for (unsigned i = 0; i < comps_per_32b; i++) { + if (ins.mask & BITFIELD_BIT(c + i)) { + base = ins.swizzle[0][c + i]; + break; + } + } + + assert(base != ~0); + + for (unsigned i = 0; i < comps_per_32b; i++) { + if (!(ins.mask & BITFIELD_BIT(c + i))) { + ins.swizzle[0][c + i] = base + i; + ins.mask |= BITFIELD_BIT(c + i); + } + assert(ins.swizzle[0][c + i] == base + i); + } + } + + } } else { unsigned bitsize = nir_src_bit_size(intr->src[0]) * nir_src_num_components(intr->src[0]); - if (bitsize <= 32) + if (bitsize == 8) + ins = m_st_u8(srcdest, 0); + else if (bitsize == 16) + ins = m_st_u16(srcdest, 0); + else if (bitsize <= 32) ins = m_st_32(srcdest, 0); else if (bitsize <= 64) ins = m_st_64(srcdest, 0); @@ -1303,10 +1344,11 @@ emit_global( ins = m_st_128(srcdest, 0); else unreachable("Invalid global store size"); + + mir_set_intr_mask(instr, &ins, is_read); } mir_set_offset(ctx, &ins, offset, seg); - mir_set_intr_mask(instr, &ins, is_read); /* Set a valid swizzle for masked out components */ assert(ins.mask); diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index 7c6fbd2..e613aa0 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -368,14 +368,24 @@ mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu) static void mir_pack_swizzle_ldst(midgard_instruction *ins) { - /* TODO: non-32-bit, non-vec4 */ - for (unsigned c = 0; c < 4; ++c) { + unsigned compsz = OP_IS_STORE(ins->op) ? + nir_alu_type_get_type_size(ins->src_types[0]) : + nir_alu_type_get_type_size(ins->dest_type); + unsigned maxcomps = 128 / compsz; + unsigned step = DIV_ROUND_UP(32, compsz); + + for (unsigned c = 0; c < maxcomps; c += step) { unsigned v = ins->swizzle[0][c]; - /* Check vec4 */ - assert(v <= 3); + /* Make sure the component index doesn't exceed the maximum + * number of components. */ + assert(v <= maxcomps); - ins->load_store.swizzle |= v << (2 * c); + if (compsz <= 32) + ins->load_store.swizzle |= (v / step) << (2 * (c / step)); + else + ins->load_store.swizzle |= ((v / step) << (4 * c)) | + (((v / step) + 1) << ((4 * c) + 2)); } /* TODO: arg_1/2 */ @@ -458,57 +468,34 @@ mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instructi static unsigned midgard_pack_common_store_mask(midgard_instruction *ins) { - unsigned comp_sz = nir_alu_type_get_type_size(ins->dest_type); - unsigned mask = ins->mask; + ASSERTED unsigned comp_sz = nir_alu_type_get_type_size(ins->src_types[0]); + unsigned bytemask = mir_bytemask(ins); unsigned packed = 0; - unsigned nr_comp; switch (ins->op) { - case midgard_op_st_u8: - packed |= mask & 1; - break; - case midgard_op_st_u16: - nr_comp = 16 / comp_sz; - for (int i = 0; i < nr_comp; i++) { - if (mask & (1 << i)) { - if (comp_sz == 16) - packed |= 0x3; - else if (comp_sz == 8) - packed |= 1 << i; - } - } - break; - case midgard_op_st_32: - case midgard_op_st_64: - case midgard_op_st_128: { - unsigned total_sz = 32; - if (ins->op == midgard_op_st_128) - total_sz = 128; - else if (ins->op == midgard_op_st_64) - total_sz = 64; - - nr_comp = total_sz / comp_sz; - - /* Each writemask bit masks 1/4th of the value to be stored. */ - assert(comp_sz >= total_sz / 4); - - for (int i = 0; i < nr_comp; i++) { - if (mask & (1 << i)) { - if (comp_sz == total_sz) - packed |= 0xF; - else if (comp_sz == total_sz / 2) - packed |= 0x3 << (i * 2); - else if (comp_sz == total_sz / 4) - packed |= 0x1 << i; - } - } - break; + case midgard_op_st_u8: + return mir_bytemask(ins) & 1; + case midgard_op_st_u16: + return mir_bytemask(ins) & 3; + case midgard_op_st_32: + return mir_bytemask(ins); + case midgard_op_st_64: + assert(comp_sz >= 16); + for (unsigned i = 0; i < 4; i++) { + if (bytemask & (3 << (i * 2))) + packed |= 1 << i; + } + return packed; + case midgard_op_st_128: + assert(comp_sz >= 32); + for (unsigned i = 0; i < 4; i++) { + if (bytemask & (0xf << (i * 4))) + packed |= 1 << i; } - default: - unreachable("unexpected ldst opcode"); + return packed; + default: + unreachable("unexpected ldst opcode"); } - - return packed; } static void @@ -523,16 +510,18 @@ mir_pack_ldst_mask(midgard_instruction *ins) if (sz == 64) { packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) | ((ins->mask & 0x1) ? (0x2 | 0x1) : 0); - } else if (sz == 16) { + } else if (sz < 32) { + unsigned comps_per_32b = 32 / sz; + packed = 0; for (unsigned i = 0; i < 4; ++i) { - /* Make sure we're duplicated */ - bool u = (ins->mask & (1 << (2*i + 0))) != 0; - ASSERTED bool v = (ins->mask & (1 << (2*i + 1))) != 0; - assert(u == v); + unsigned submask = (ins->mask >> (i * comps_per_32b)) & + BITFIELD_MASK(comps_per_32b); - packed |= (u << i); + /* Make sure we're duplicated */ + assert(submask == 0 || submask == BITFIELD_MASK(comps_per_32b)); + packed |= (submask != 0) << i; } } else { assert(sz == 32); diff --git a/src/panfrost/midgard/midgard_opt_dce.c b/src/panfrost/midgard/midgard_opt_dce.c index c43c4eb..f08972f 100644 --- a/src/panfrost/midgard/midgard_opt_dce.c +++ b/src/panfrost/midgard/midgard_opt_dce.c @@ -80,7 +80,7 @@ midgard_opt_dead_code_eliminate_block(compiler_context *ctx, midgard_block *bloc unsigned oldmask = ins->mask; /* Make sure we're packable */ - if (type_size == 16 && ins->type == TAG_LOAD_STORE_4) + if (type_size < 32 && ins->type == TAG_LOAD_STORE_4) round_size = 32; unsigned rounded = mir_round_bytemask_up(live[ins->dest], round_size); diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 40605df..af208bb 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -781,7 +781,7 @@ install_registers_instr( struct phys_reg dst = index_to_reg(ctx, l, ins->dest, dest_shift); ins->dest = SSA_FIXED_REGISTER(dst.reg); - offset_swizzle(ins->swizzle[0], 0, 2, 2, dst.offset); + offset_swizzle(ins->swizzle[0], 0, 2, dest_shift, dst.offset); mir_set_bytemask(ins, mir_bytemask(ins) << dst.offset); } -- 2.7.4