freedreno,tu: Update SP_FS_PREFETCH,SP_FS_PREFETCH_CNTL regs definition
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Thu, 10 Nov 2022 14:38:28 +0000 (15:38 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 22 Nov 2022 11:56:21 +0000 (11:56 +0000)
Reverse engineer more fields of these regs.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19652>

src/freedreno/.gitlab-ci/reference/crash.log
src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
src/freedreno/.gitlab-ci/reference/fd-clouds.log
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h
src/freedreno/registers/adreno/a6xx.xml
src/freedreno/registers/adreno/adreno_common.xml
src/freedreno/vulkan/tu_pipeline.c
src/gallium/drivers/freedreno/a6xx/fd6_program.c

index 889a8ba..cb971c4 100644 (file)
@@ -7402,10 +7402,10 @@ clusters:
        00000000        SP_FS_MRT[0x6].REG: { COLOR_FORMAT = 0 }
        00000000        SP_FS_MRT[0x7].REG: { COLOR_FORMAT = 0 }
        00000000        SP_FS_PREFETCH_CNTL: { COUNT = 0 }
-       03c00000        SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
-       03c00000        SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
-       03c00000        SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
-       03c00000        SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
+       03c00000        SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
+       03c00000        SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
+       03c00000        SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
+       03c00000        SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
        00000000        SP_FS_BINDLESS_PREFETCH[0].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
        00000000        SP_FS_BINDLESS_PREFETCH[0x1].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
        00000000        SP_FS_BINDLESS_PREFETCH[0x2].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
@@ -7484,10 +7484,10 @@ clusters:
        00000000        SP_FS_MRT[0x6].REG: { COLOR_FORMAT = 0 }
        00000000        SP_FS_MRT[0x7].REG: { COLOR_FORMAT = 0 }
        00000000        SP_FS_PREFETCH_CNTL: { COUNT = 0 }
-       03c00000        SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
-       03c00000        SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
-       03c00000        SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
-       03c00000        SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
+       03c00000        SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
+       03c00000        SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
+       03c00000        SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
+       03c00000        SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
        00000000        SP_FS_BINDLESS_PREFETCH[0].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
        00000000        SP_FS_BINDLESS_PREFETCH[0x1].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
        00000000        SP_FS_BINDLESS_PREFETCH[0x2].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
index a8e93da..b3ca7d2 100644 (file)
@@ -996,7 +996,7 @@ t4                                  write VPC_VARYING_PS_REPL[0].MODE (9208)
 0000000001054320:                                      0000: 48920808 00000000 00000000 00000000 00000000 00000000 00000000 00000000
 *
 t4                                     write SP_FS_PREFETCH_CNTL (a99e)
-                                               SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
+                                               SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
 0000000001054344:                                      0000: 40a99e01 00007fc0
 t4                                     write HLSQ_CONTROL_1_REG (b982)
                                                HLSQ_CONTROL_1_REG: 0x7
@@ -1515,7 +1515,7 @@ t7                        opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
 !+     000000fc                        SP_FS_OUTPUT[0x6].REG: { REGID = r63.x }
 !+     000000fc                        SP_FS_OUTPUT[0x7].REG: { REGID = r63.x }
 !+     00000030                        SP_FS_MRT[0].REG: { COLOR_FORMAT = FMT6_8_8_8_8_UNORM }
-!+     00007fc0                        SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
+!+     00007fc0                        SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
  +     00000000                        SP_CS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+     00000100                        SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
 !+     00000001                        SP_FS_INSTRLEN: 1
index 34b8259..8121d23 100644 (file)
@@ -606,7 +606,7 @@ t4                                  write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
                                                SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
 0000000001121000:                                      0000: 40a83301 00000000
 t4                                     write SP_FS_PREFETCH_CNTL (a99e)
-                                               SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
+                                               SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
 0000000001121008:                                      0000: 40a99e01 00007fc0
 t4                                     write SP_UNKNOWN_A9A8 (a9a8)
                                                SP_UNKNOWN_A9A8: 0
@@ -1116,7 +1116,7 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 !+     000000fc                        SP_FS_OUTPUT[0x6].REG: { REGID = r63.x }
 !+     000000fc                        SP_FS_OUTPUT[0x7].REG: { REGID = r63.x }
 !+     00000031                        SP_FS_MRT[0].REG: { COLOR_FORMAT = FMT6_8_8_8_X8_UNORM }
-!+     00007fc0                        SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
+!+     00007fc0                        SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
  +     00000000                        SP_UNKNOWN_A9A8: 0
 !+     00000005                        SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | ISAMMODE = ISAMMODE_GL }
 !+     00000100                        SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
@@ -1900,7 +1900,7 @@ t4                                        write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
                                                SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
 0000000001120000:                                      0000: 40a83301 00000000
 t4                                     write SP_FS_PREFETCH_CNTL (a99e)
-                                               SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
+                                               SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
 0000000001120008:                                      0000: 40a99e01 00007fc0
 t4                                     write SP_UNKNOWN_A9A8 (a9a8)
                                                SP_UNKNOWN_A9A8: 0
@@ -6738,7 +6738,7 @@ t7                        opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
 !+     00000004                        SP_FS_OUTPUT[0x5].REG: { REGID = r1.x }
 !+     00000004                        SP_FS_OUTPUT[0x6].REG: { REGID = r1.x }
 !+     00000004                        SP_FS_OUTPUT[0x7].REG: { REGID = r1.x }
- +     00007fc0                        SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
+ +     00007fc0                        SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
  +     00000000                        SP_UNKNOWN_A9A8: 0
  +     00000005                        SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | ISAMMODE = ISAMMODE_GL }
  +     00000100                        SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
index 3db225e..2ed14ed 100644 (file)
@@ -4575,18 +4575,18 @@ collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir)
                &ctx->so->sampler_prefetch[idx];
             idx++;
 
-            if (instr->flags & IR3_INSTR_B) {
-               fetch->cmd = IR3_SAMPLER_BINDLESS_PREFETCH_CMD;
+            fetch->bindless = instr->flags & IR3_INSTR_B;
+            if (fetch->bindless) {
                /* In bindless mode, the index is actually the base */
                fetch->tex_id = instr->prefetch.tex_base;
                fetch->samp_id = instr->prefetch.samp_base;
                fetch->tex_bindless_id = instr->prefetch.tex;
                fetch->samp_bindless_id = instr->prefetch.samp;
             } else {
-               fetch->cmd = IR3_SAMPLER_PREFETCH_CMD;
                fetch->tex_id = instr->prefetch.tex;
                fetch->samp_id = instr->prefetch.samp;
             }
+            fetch->tex_opc = OPC_SAM;
             fetch->wrmask = instr->dsts[0]->wrmask;
             fetch->dst = instr->dsts[0]->num;
             fetch->src = instr->prefetch.input_offset;
index 02e615c..4368694 100644 (file)
@@ -770,10 +770,11 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
    for (i = 0; i < so->num_sampler_prefetch; i++) {
       const struct ir3_sampler_prefetch *fetch = &so->sampler_prefetch[i];
       fprintf(out,
-              "@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, cmd=%u\n",
+              "@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, opc=%s\n",
               fetch->half_precision ? "h" : "", fetch->dst >> 2,
               "xyzw"[fetch->dst & 0x3], fetch -> src, fetch -> samp_id,
-              fetch -> tex_id, fetch -> wrmask, fetch -> cmd);
+              fetch -> tex_id, fetch -> wrmask,
+              disasm_a3xx_instr_name(fetch->tex_opc));
    }
 
    const struct ir3_const_state *const_state = ir3_const_state(so);
index a644850..c21f56e 100644 (file)
@@ -263,6 +263,7 @@ struct ir3_stream_output_info {
  */
 struct ir3_sampler_prefetch {
    uint8_t src;
+   bool bindless;
    uint8_t samp_id;
    uint8_t tex_id;
    uint16_t samp_bindless_id;
@@ -270,7 +271,7 @@ struct ir3_sampler_prefetch {
    uint8_t dst;
    uint8_t wrmask;
    uint8_t half_precision;
-   uint8_t cmd;
+   opc_t tex_opc;
 };
 
 /* Configuration key used to identify a shader variant.. different
index 7c6930d..5c379d4 100644 (file)
@@ -3226,12 +3226,24 @@ to upconvert to 32b float internally?
        </array>
 
        <reg32 offset="0xa99e" name="SP_FS_PREFETCH_CNTL">
-               <!-- unknown bits 0x7fc0 always set -->
                <bitfield name="COUNT" low="0" high="2" type="uint"/>
-               <!-- b3 set if no other use of varyings in the shader itself.. maybe alternative to dummy bary.f? -->
-               <bitfield name="UNK3" pos="3" type="boolean"/>
-               <bitfield name="UNK4" low="4" high="11" type="a3xx_regid"/>
-               <bitfield name="UNK12" low="12" high="14"/>
+               <bitfield name="IJ_WRITE_DISABLE" pos="3" type="boolean"/>
+               <doc>
+                       Seem to break derivatives when there is a helper invocation
+                       in the quad. Though from tests it doesn't seem to be
+                       "disable helper invocations" flag.
+               </doc>
+               <bitfield name="UNK4" pos="4" type="boolean" />
+               <doc>
+                       Bypass writing to regs and overwrite output with tex color.
+                       TODO: How does it work with multiple prefetches?
+               </doc>
+               <bitfield name="WRITE_COLOR_TO_OUTPUT" pos="5" type="boolean"/>
+               <doc>
+                       Doesn't seem to be a reg, size doesn't match and it doesn't do
+                       anything observable.
+               </doc>
+               <bitfield name="UNK6" low="6" high="14" type="uint"/>
        </reg32>
        <array offset="0xa99f" name="SP_FS_PREFETCH" stride="1" length="4">
                <reg32 offset="0" name="CMD">
@@ -3241,14 +3253,10 @@ to upconvert to 32b float internally?
                        <bitfield name="DST" low="16" high="21" type="a3xx_regid"/>
                        <bitfield name="WRMASK" low="22" high="25" type="hex"/>
                        <bitfield name="HALF" pos="26" type="boolean"/>
-                       <!--
-                       CMD seems always 0x4??  3d, textureProj, textureLod seem to
-                       skip pre-fetch.. TODO test texelFetch
-                        CMD is 0x6 when the Vulkan mode is enabled, and
-                        TEX_ID/SAMP_ID refer to the descriptor sets while the
-                        indices come from SP_FS_BINDLESS_PREFETCH[n]
-                        -->
-                       <bitfield name="CMD" low="27" high="31"/>
+                       <doc>Results in color being zero</doc>
+                       <bitfield name="UNK27" pos="27" type="boolean"/>
+                       <bitfield name="BINDLESS" pos="28" type="boolean"/>
+                       <bitfield name="CMD" low="29" high="31" type="a6xx_tex_prefetch_cmd"/>
                </reg32>
        </array>
        <array offset="0xa9a3" name="SP_FS_BINDLESS_PREFETCH" stride="1" length="4">
index 212839e..677896a 100644 (file)
@@ -376,5 +376,25 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
        <value value="0x1"  name="RECTANGULAR"/>
 </enum>
 
+<doc>
+       Blob (v615) seem to only use SAM and I wasn't able to coerce
+       it to produce any other command.
+       Probably valid for a4xx+ but not enabled or tested on anything
+       but a6xx.
+</doc>
+<enum name="a6xx_tex_prefetch_cmd">
+       <doc> Produces garbage </doc>
+       <value value="0x0" name="TEX_PREFETCH_UNK0"/>
+       <value value="0x1" name="TEX_PREFETCH_SAM"/>
+       <value value="0x2" name="TEX_PREFETCH_GATHER4R"/>
+       <value value="0x3" name="TEX_PREFETCH_GATHER4G"/>
+       <value value="0x4" name="TEX_PREFETCH_GATHER4B"/>
+       <value value="0x5" name="TEX_PREFETCH_GATHER4A"/>
+       <doc> Causes reads from an invalid address </doc>
+       <value value="0x6" name="TEX_PREFETCH_UNK6"/>
+       <doc> Results in color being zero </doc>
+       <value value="0x7" name="TEX_PREFETCH_UNK7"/>
+</enum>
+
 </database>
 
index 0993c96..ec249d3 100644 (file)
@@ -1468,6 +1468,17 @@ tu6_emit_vpc(struct tu_cs *cs,
    tu6_emit_vpc_varying_modes(cs, fs, last_shader);
 }
 
+static enum a6xx_tex_prefetch_cmd
+tu6_tex_opc_to_prefetch_cmd(opc_t tex_opc)
+{
+   switch (tex_opc) {
+   case OPC_SAM:
+      return TEX_PREFETCH_SAM;
+   default:
+      unreachable("Unknown tex opc for prefeth cmd");
+   }
+}
+
 void
 tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
 {
@@ -1494,8 +1505,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
    tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
-         A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
-         0x7000);    // XXX);
+                     COND(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
+                          A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE));
    for (int i = 0; i < fs->num_sampler_prefetch; i++) {
       const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
       tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) |
@@ -1504,7 +1515,9 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
                      A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
                      A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
                      COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
-                     A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
+                     COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) |
+                     A6XX_SP_FS_PREFETCH_CMD_CMD(
+                        tu6_tex_opc_to_prefetch_cmd(prefetch->tex_opc)));
    }
 
    if (fs->num_sampler_prefetch > 0) {
index da3cd39..f75f427 100644 (file)
@@ -388,6 +388,17 @@ fd6_emit_tess_bos(struct fd_screen *screen, struct fd_ringbuffer *ring,
    OUT_RELOC(ring, screen->tess_bo, 0, 0, 0);
 }
 
+static enum a6xx_tex_prefetch_cmd
+tex_opc_to_prefetch_cmd(opc_t tex_opc)
+{
+   switch (tex_opc) {
+   case OPC_SAM:
+      return TEX_PREFETCH_SAM;
+   default:
+      unreachable("Unknown tex opc for prefeth cmd");
+   }
+}
+
 static void
 setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                struct fd6_program_state *state,
@@ -539,8 +550,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
 
    OUT_PKT4(ring, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
    OUT_RING(ring, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
-                     A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
-                     0x7000); // XXX
+                     COND(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
+                          A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE));
    for (int i = 0; i < fs->num_sampler_prefetch; i++) {
       const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
       OUT_RING(ring,
@@ -550,7 +561,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                   A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
                   A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
                   COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
-                  A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
+                  COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) |
+                  A6XX_SP_FS_PREFETCH_CMD_CMD(
+                     tex_opc_to_prefetch_cmd(prefetch->tex_opc)));
    }
 
    OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1);