ir3: Add nonuniform encodings to ir3 encoder and parser
authorHyunjun Ko <zzoon@igalia.com>
Tue, 9 Mar 2021 05:57:53 +0000 (05:57 +0000)
committerMarge Bot <eric+marge@anholt.net>
Wed, 17 Mar 2021 01:09:30 +0000 (01:09 +0000)
By keeping track of nonuniform access from nir and storing it to ir3.

Signed-off-by: Hyunjun Ko <zzoon@igalia.com>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9125>

src/freedreno/ir3/ir3_a6xx.c
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_context.h
src/freedreno/ir3/ir3_parser.y
src/freedreno/ir3/tests/disasm.c
src/freedreno/isa/encode.c
src/freedreno/isa/ir3-cat5.xml

index d4ccbf8..412a752 100644 (file)
@@ -56,6 +56,7 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
        ldib->barrier_class = IR3_BARRIER_BUFFER_R;
        ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
        ir3_handle_bindless_cat6(ldib, intr->src[0]);
+       ir3_handle_nonuniform(ldib, intr);
 
        ir3_split_dest(b, dst, ldib, 0, intr->num_components);
 }
@@ -83,6 +84,7 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        stib->barrier_class = IR3_BARRIER_BUFFER_W;
        stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
        ir3_handle_bindless_cat6(stib, intr->src[1]);
+       ir3_handle_nonuniform(stib, intr);
 
        array_insert(b, b->keeps, stib);
 }
@@ -214,6 +216,7 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
        ldib->barrier_class = IR3_BARRIER_IMAGE_R;
        ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
        ir3_handle_bindless_cat6(ldib, intr->src[0]);
+       ir3_handle_nonuniform(ldib, intr);
 
        ir3_split_dest(b, dst, ldib, 0, intr->num_components);
 }
@@ -242,6 +245,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
        stib->barrier_class = IR3_BARRIER_IMAGE_W;
        stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
        ir3_handle_bindless_cat6(stib, intr->src[0]);
+       ir3_handle_nonuniform(stib, intr);
 
        array_insert(b, b->keeps, stib);
 }
index 71f57cc..955c097 100644 (file)
 #include "ir3_context.h"
 
 void
+ir3_handle_nonuniform(struct ir3_instruction *instr, nir_intrinsic_instr *intrin)
+{
+       if (nir_intrinsic_has_access(intrin) &&
+                       (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)) {
+               instr->flags |= IR3_INSTR_NONUNIF;
+       }
+}
+
+void
 ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc)
 {
        nir_intrinsic_instr *intrin = ir3_bindless_resource(rsrc);
@@ -741,6 +750,7 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr,
        ir3_handle_bindless_cat6(ldc, intr->src[0]);
        if (ldc->flags & IR3_INSTR_B)
                ctx->so->bindless_ubo = true;
+       ir3_handle_nonuniform(ldc, intr);
 
        ir3_split_dest(b, dst, ldc, 0, ncomp);
 }
@@ -1233,6 +1243,8 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
        sam = emit_sam(ctx, OPC_ISAM, info, type, 0b1111,
                                   ir3_create_collect(ctx, coords, ncoords), NULL);
 
+       ir3_handle_nonuniform(sam, intr);
+
        sam->barrier_class = IR3_BARRIER_IMAGE_R;
        sam->barrier_conflict = IR3_BARRIER_IMAGE_W;
 
@@ -2093,6 +2105,9 @@ get_tex_samp_tex_src(struct ir3_context *ctx, nir_tex_instr *tex)
                /* Bindless case */
                info.flags |= IR3_INSTR_B;
 
+               if (tex->texture_non_uniform || tex->sampler_non_uniform)
+                       info.flags |= IR3_INSTR_NONUNIF;
+
                /* Gather information required to determine which encoding to
                 * choose as well as for prefetch.
                 */
index b26159e..2a0066e 100644 (file)
@@ -183,6 +183,7 @@ struct ir3_instruction * ir3_create_collect(struct ir3_context *ctx,
 void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
                struct ir3_instruction *src, unsigned base, unsigned n);
 void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc);
+void ir3_handle_nonuniform(struct ir3_instruction *instr, nir_intrinsic_instr *intrin);
 void emit_intrinsic_image_size_tex(struct ir3_context *ctx, nir_intrinsic_instr *intr,
                struct ir3_instruction **dst);
 
index 8aad0c6..c379daf 100644 (file)
@@ -181,7 +181,12 @@ static void fixup_cat5_s2en(void)
         * fix things up.
         */
        struct ir3_register *s2en_src = instr->regs[instr->regs_count - 1];
-       assert(s2en_src->flags & IR3_REG_HALF);
+
+       if (instr->flags & IR3_INSTR_B)
+               assert(!(s2en_src->flags & IR3_REG_HALF));
+       else
+               assert(s2en_src->flags & IR3_REG_HALF);
+
        for (int i = 1; i < instr->regs_count - 1; i++) {
                instr->regs[i+1] = instr->regs[i];
        }
@@ -919,6 +924,7 @@ cat5_flag:         '.' T_3D       { instr->flags |= IR3_INSTR_3D; }
 |                  '.' 'p'        { instr->flags |= IR3_INSTR_P; }
 |                  '.' 's'        { instr->flags |= IR3_INSTR_S; }
 |                  '.' T_S2EN     { instr->flags |= IR3_INSTR_S2EN; }
+|                  '.' T_NONUNIFORM  { instr->flags |= IR3_INSTR_NONUNIF; }
 |                  '.' T_BASE     { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; }
 cat5_flags:
 |                  cat5_flag cat5_flags
@@ -928,6 +934,7 @@ cat5_tex:          T_TEX          { if (instr->flags & IR3_INSTR_B) instr->cat5.
 cat5_type:         '(' type ')'   { instr->cat5.type = $2; }
 
 cat5_instr:        cat5_opc_dsxypp cat5_flags dst_reg ',' src_reg
+|                  cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' src_reg
 |                  cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_samp ',' cat5_tex
 |                  cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_samp
 |                  cat5_opc cat5_flags cat5_type dst_reg ',' src_reg ',' src_reg ',' cat5_tex
index 27c2f68..154d264 100644 (file)
@@ -307,6 +307,17 @@ static const struct test {
        /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
        INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
 
+       /* NonUniform: */
+       /* dEQP-VK.descriptor_indexing.storage_buffer */
+       INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"),
+       INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"),
+       /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */
+       INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"),
+       /* dEQP-VK.descriptor_indexing.storage_image */
+       INSTR_6XX(d0360c04_02640b81, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"),
+       /* dEQP-VK.descriptor_indexing.sampler */
+       INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
+
        /* Custom test since we've never seen the blob emit these. */
        INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
        INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
index 5d809a3..d6922a3 100644 (file)
@@ -178,6 +178,8 @@ extract_cat5_DESC_MODE(struct ir3_instruction *instr)
                if (instr->flags & IR3_INSTR_B) {
                        if (instr->flags & IR3_INSTR_A1EN) {
                                return CAT5_BINDLESS_A1_UNIFORM;
+                       } else if (instr->flags & IR3_INSTR_NONUNIF) {
+                               return CAT5_BINDLESS_NONUNIFORM;
                        } else {
                                return CAT5_BINDLESS_UNIFORM;
                        }
index 92697f6..e507932 100644 (file)
@@ -58,7 +58,7 @@ SOFTWARE.
                        The s2en (indirect) or bindless case
                </doc>
                <display>
-                       {SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1}
+                       {SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{NONUNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1}
                </display>
                <field name="BASE_HI" low="19" high="20" type="uint"/>
                <field name="SRC3" low="21" high="28" type="#cat5-src3">
@@ -75,6 +75,7 @@ SOFTWARE.
                <derived name="BINDLESS" expr="#cat5-s2enb-is-bindless" type="bool"/>
                <derived name="S2EN" expr="#cat5-s2enb-is-indirect" type="bool" display=".s2en"/>
                <derived name="UNIFORM" expr="#cat5-s2enb-is-uniform" type="bool" display=".uniform"/>
+               <derived name="NONUNIFORM" expr="#cat5-s2enb-is-nonuniform" type="bool" display=".nonuniform"/>
                <derived name="A1" expr="#cat5-s2enb-uses_a1" type="bool" display=", a1.x"/>
        </override>
 
@@ -604,6 +605,14 @@ SOFTWARE.
        ({DESC_MODE} == 5) /* CAT5_BINDLESS_UNIFORM */
 </expr>
 
+<!-- Helper to map s2en/bindless DESC_MODE to whether it is non-uniform mode
+        Note that it returns only for bindless for now, since we need to figure out bindful
+        uniform/nonuniform mode correctly. See TODO in extract_cat5_DESC_MODE in encode.c -->
+<expr name="#cat5-s2enb-is-nonuniform">
+       ({DESC_MODE} == 2) /* CAT5_BINDLESS_NONUNIFORM */ ||
+       ({DESC_MODE} == 3) /* CAT5_BINDLESS_A1_NONUNIFORM */
+</expr>
+
 <bitset name="#cat5-src3" size="8">
        <doc>bindless/indirect src3, which can either be GPR or samp/tex</doc>
        <override expr="#cat5-s2enb-is-indirect">