From: Hyunjun Ko Date: Tue, 9 Mar 2021 05:57:53 +0000 (+0000) Subject: ir3: Add nonuniform encodings to ir3 encoder and parser X-Git-Tag: upstream/21.2.3~6435 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e9fd2a2a584d6ec67a92f24c11a969c31d635dc9;p=platform%2Fupstream%2Fmesa.git ir3: Add nonuniform encodings to ir3 encoder and parser By keeping track of nonuniform access from nir and storing it to ir3. Signed-off-by: Hyunjun Ko Reviewed-by: Danylo Piliaiev Reviewed-by: Eric Anholt Part-of: --- diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index d4ccbf8..412a752 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -56,6 +56,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, ldib->barrier_class = IR3_BARRIER_BUFFER_R; ldib->barrier_conflict = IR3_BARRIER_BUFFER_W; ir3_handle_bindless_cat6(ldib, intr->src[0]); + ir3_handle_nonuniform(ldib, intr); ir3_split_dest(b, dst, ldib, 0, intr->num_components); } @@ -83,6 +84,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) stib->barrier_class = IR3_BARRIER_BUFFER_W; stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; ir3_handle_bindless_cat6(stib, intr->src[1]); + ir3_handle_nonuniform(stib, intr); array_insert(b, b->keeps, stib); } @@ -214,6 +216,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, ldib->barrier_class = IR3_BARRIER_IMAGE_R; ldib->barrier_conflict = IR3_BARRIER_IMAGE_W; ir3_handle_bindless_cat6(ldib, intr->src[0]); + ir3_handle_nonuniform(ldib, intr); ir3_split_dest(b, dst, ldib, 0, intr->num_components); } @@ -242,6 +245,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) stib->barrier_class = IR3_BARRIER_IMAGE_W; stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; ir3_handle_bindless_cat6(stib, intr->src[0]); + ir3_handle_nonuniform(stib, intr); array_insert(b, b->keeps, stib); } diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 71f57cc..955c097 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -40,6 +40,15 @@ #include "ir3_context.h" void +ir3_handle_nonuniform(struct ir3_instruction *instr, nir_intrinsic_instr *intrin) +{ + if (nir_intrinsic_has_access(intrin) && + (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)) { + instr->flags |= IR3_INSTR_NONUNIF; + } +} + +void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc) { nir_intrinsic_instr *intrin = ir3_bindless_resource(rsrc); @@ -741,6 +750,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr, ir3_handle_bindless_cat6(ldc, intr->src[0]); if (ldc->flags & IR3_INSTR_B) ctx->so->bindless_ubo = true; + ir3_handle_nonuniform(ldc, intr); ir3_split_dest(b, dst, ldc, 0, ncomp); } @@ -1233,6 +1243,8 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, sam = emit_sam(ctx, OPC_ISAM, info, type, 0b1111, ir3_create_collect(ctx, coords, ncoords), NULL); + ir3_handle_nonuniform(sam, intr); + sam->barrier_class = IR3_BARRIER_IMAGE_R; sam->barrier_conflict = IR3_BARRIER_IMAGE_W; @@ -2093,6 +2105,9 @@ get_tex_samp_tex_src(struct ir3_context *ctx, nir_tex_instr *tex) /* Bindless case */ info.flags |= IR3_INSTR_B; + if (tex->texture_non_uniform || tex->sampler_non_uniform) + info.flags |= IR3_INSTR_NONUNIF; + /* Gather information required to determine which encoding to * choose as well as for prefetch. */ diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index b26159e..2a0066e 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -183,6 +183,7 @@ struct ir3_instruction * ir3_create_collect(struct ir3_context *ctx, void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, struct ir3_instruction *src, unsigned base, unsigned n); void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc); +void ir3_handle_nonuniform(struct ir3_instruction *instr, nir_intrinsic_instr *intrin); void emit_intrinsic_image_size_tex(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst); diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index 8aad0c6..c379daf 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -181,7 +181,12 @@ static void fixup_cat5_s2en(void) * fix things up. */ struct ir3_register *s2en_src = instr->regs[instr->regs_count - 1]; - assert(s2en_src->flags & IR3_REG_HALF); + + if (instr->flags & IR3_INSTR_B) + assert(!(s2en_src->flags & IR3_REG_HALF)); + else + assert(s2en_src->flags & IR3_REG_HALF); + for (int i = 1; i < instr->regs_count - 1; i++) { instr->regs[i+1] = instr->regs[i]; } @@ -919,6 +924,7 @@ cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; } | '.' 'p' { instr->flags |= IR3_INSTR_P; } | '.' 's' { instr->flags |= IR3_INSTR_S; } | '.' T_S2EN { instr->flags |= IR3_INSTR_S2EN; } +| '.' T_NONUNIFORM { instr->flags |= IR3_INSTR_NONUNIF; } | '.' T_BASE { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; } cat5_flags: | cat5_flag cat5_flags @@ -928,6 +934,7 @@ cat5_tex: T_TEX { if (instr->flags & IR3_INSTR_B) instr->cat5. cat5_type: '(' type ')' { instr->cat5.type = $2; } cat5_instr: cat5_opc_dsxypp cat5_flags dst_reg ',' src_reg +| cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' src_reg | cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_samp ',' cat5_tex | cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_samp | cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_tex diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index 27c2f68..154d264 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -307,6 +307,17 @@ static const struct test { /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */ INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */ + /* NonUniform: */ + /* dEQP-VK.descriptor_indexing.storage_buffer */ + INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"), + INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"), + /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */ + INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"), + /* dEQP-VK.descriptor_indexing.storage_image */ + INSTR_6XX(d0360c04_02640b81, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"), + /* dEQP-VK.descriptor_indexing.sampler */ + INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"), + /* Custom test since we've never seen the blob emit these. */ INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"), INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"), diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c index 5d809a3..d6922a3 100644 --- a/src/freedreno/isa/encode.c +++ b/src/freedreno/isa/encode.c @@ -178,6 +178,8 @@ extract_cat5_DESC_MODE(struct ir3_instruction *instr) if (instr->flags & IR3_INSTR_B) { if (instr->flags & IR3_INSTR_A1EN) { return CAT5_BINDLESS_A1_UNIFORM; + } else if (instr->flags & IR3_INSTR_NONUNIF) { + return CAT5_BINDLESS_NONUNIFORM; } else { return CAT5_BINDLESS_UNIFORM; } diff --git a/src/freedreno/isa/ir3-cat5.xml b/src/freedreno/isa/ir3-cat5.xml index 92697f6..e507932 100644 --- a/src/freedreno/isa/ir3-cat5.xml +++ b/src/freedreno/isa/ir3-cat5.xml @@ -58,7 +58,7 @@ SOFTWARE. The s2en (indirect) or bindless case - {SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1} + {SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{NONUNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1} @@ -75,6 +75,7 @@ SOFTWARE. + @@ -604,6 +605,14 @@ SOFTWARE. ({DESC_MODE} == 5) /* CAT5_BINDLESS_UNIFORM */ + + + ({DESC_MODE} == 2) /* CAT5_BINDLESS_NONUNIFORM */ || + ({DESC_MODE} == 3) /* CAT5_BINDLESS_A1_NONUNIFORM */ + + bindless/indirect src3, which can either be GPR or samp/tex