From c7c186590c54630f6c579121c823208e9bf91de0 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Thu, 10 Nov 2022 15:38:28 +0100 Subject: [PATCH] freedreno,tu: Update SP_FS_PREFETCH,SP_FS_PREFETCH_CNTL regs definition Reverse engineer more fields of these regs. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/.gitlab-ci/reference/crash.log | 16 +++++----- ...w.indexed.indirect_draw_count.triangle_list.log | 4 +-- src/freedreno/.gitlab-ci/reference/fd-clouds.log | 8 ++--- src/freedreno/ir3/ir3_compiler_nir.c | 6 ++-- src/freedreno/ir3/ir3_shader.c | 5 ++-- src/freedreno/ir3/ir3_shader.h | 3 +- src/freedreno/registers/adreno/a6xx.xml | 34 +++++++++++++--------- src/freedreno/registers/adreno/adreno_common.xml | 20 +++++++++++++ src/freedreno/vulkan/tu_pipeline.c | 19 ++++++++++-- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 19 ++++++++++-- 10 files changed, 95 insertions(+), 39 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index 889a8ba..cb971c4 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -7402,10 +7402,10 @@ clusters: 00000000 SP_FS_MRT[0x6].REG: { COLOR_FORMAT = 0 } 00000000 SP_FS_MRT[0x7].REG: { COLOR_FORMAT = 0 } 00000000 SP_FS_PREFETCH_CNTL: { COUNT = 0 } - 03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } - 03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } - 03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } - 03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } + 03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } + 03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } + 03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } + 03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } 00000000 SP_FS_BINDLESS_PREFETCH[0].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000000 SP_FS_BINDLESS_PREFETCH[0x1].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000000 SP_FS_BINDLESS_PREFETCH[0x2].CMD: { SAMP_ID = 0 | TEX_ID = 0 } @@ -7484,10 +7484,10 @@ clusters: 00000000 SP_FS_MRT[0x6].REG: { COLOR_FORMAT = 0 } 00000000 SP_FS_MRT[0x7].REG: { COLOR_FORMAT = 0 } 00000000 SP_FS_PREFETCH_CNTL: { COUNT = 0 } - 03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } - 03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } - 03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } - 03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 } + 03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } + 03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } + 03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } + 03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 } 00000000 SP_FS_BINDLESS_PREFETCH[0].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000000 SP_FS_BINDLESS_PREFETCH[0x1].CMD: { SAMP_ID = 0 | TEX_ID = 0 } 00000000 SP_FS_BINDLESS_PREFETCH[0x2].CMD: { SAMP_ID = 0 | TEX_ID = 0 } diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index a8e93da..b3ca7d2 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -996,7 +996,7 @@ t4 write VPC_VARYING_PS_REPL[0].MODE (9208) 0000000001054320: 0000: 48920808 00000000 00000000 00000000 00000000 00000000 00000000 00000000 * t4 write SP_FS_PREFETCH_CNTL (a99e) - SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 } + SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff } 0000000001054344: 0000: 40a99e01 00007fc0 t4 write HLSQ_CONTROL_1_REG (b982) HLSQ_CONTROL_1_REG: 0x7 @@ -1515,7 +1515,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords) !+ 000000fc SP_FS_OUTPUT[0x6].REG: { REGID = r63.x } !+ 000000fc SP_FS_OUTPUT[0x7].REG: { REGID = r63.x } !+ 00000030 SP_FS_MRT[0].REG: { COLOR_FORMAT = FMT6_8_8_8_8_UNORM } -!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 } +!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff } + 00000000 SP_CS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 } !+ 00000100 SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } !+ 00000001 SP_FS_INSTRLEN: 1 diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index 34b8259..8121d23 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -606,7 +606,7 @@ t4 write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833) SP_HS_OBJ_FIRST_EXEC_OFFSET: 0 0000000001121000: 0000: 40a83301 00000000 t4 write SP_FS_PREFETCH_CNTL (a99e) - SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 } + SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff } 0000000001121008: 0000: 40a99e01 00007fc0 t4 write SP_UNKNOWN_A9A8 (a9a8) SP_UNKNOWN_A9A8: 0 @@ -1116,7 +1116,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) !+ 000000fc SP_FS_OUTPUT[0x6].REG: { REGID = r63.x } !+ 000000fc SP_FS_OUTPUT[0x7].REG: { REGID = r63.x } !+ 00000031 SP_FS_MRT[0].REG: { COLOR_FORMAT = FMT6_8_8_8_X8_UNORM } -!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 } +!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff } + 00000000 SP_UNKNOWN_A9A8: 0 !+ 00000005 SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | ISAMMODE = ISAMMODE_GL } !+ 00000100 SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } @@ -1900,7 +1900,7 @@ t4 write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833) SP_HS_OBJ_FIRST_EXEC_OFFSET: 0 0000000001120000: 0000: 40a83301 00000000 t4 write SP_FS_PREFETCH_CNTL (a99e) - SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 } + SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff } 0000000001120008: 0000: 40a99e01 00007fc0 t4 write SP_UNKNOWN_A9A8 (a9a8) SP_UNKNOWN_A9A8: 0 @@ -6738,7 +6738,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) !+ 00000004 SP_FS_OUTPUT[0x5].REG: { REGID = r1.x } !+ 00000004 SP_FS_OUTPUT[0x6].REG: { REGID = r1.x } !+ 00000004 SP_FS_OUTPUT[0x7].REG: { REGID = r1.x } - + 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 } + + 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff } + 00000000 SP_UNKNOWN_A9A8: 0 + 00000005 SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | ISAMMODE = ISAMMODE_GL } + 00000100 SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 } diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 3db225e..2ed14ed 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -4575,18 +4575,18 @@ collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir) &ctx->so->sampler_prefetch[idx]; idx++; - if (instr->flags & IR3_INSTR_B) { - fetch->cmd = IR3_SAMPLER_BINDLESS_PREFETCH_CMD; + fetch->bindless = instr->flags & IR3_INSTR_B; + if (fetch->bindless) { /* In bindless mode, the index is actually the base */ fetch->tex_id = instr->prefetch.tex_base; fetch->samp_id = instr->prefetch.samp_base; fetch->tex_bindless_id = instr->prefetch.tex; fetch->samp_bindless_id = instr->prefetch.samp; } else { - fetch->cmd = IR3_SAMPLER_PREFETCH_CMD; fetch->tex_id = instr->prefetch.tex; fetch->samp_id = instr->prefetch.samp; } + fetch->tex_opc = OPC_SAM; fetch->wrmask = instr->dsts[0]->wrmask; fetch->dst = instr->dsts[0]->num; fetch->src = instr->prefetch.input_offset; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 02e615c..4368694 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -770,10 +770,11 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) for (i = 0; i < so->num_sampler_prefetch; i++) { const struct ir3_sampler_prefetch *fetch = &so->sampler_prefetch[i]; fprintf(out, - "@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, cmd=%u\n", + "@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, opc=%s\n", fetch->half_precision ? "h" : "", fetch->dst >> 2, "xyzw"[fetch->dst & 0x3], fetch -> src, fetch -> samp_id, - fetch -> tex_id, fetch -> wrmask, fetch -> cmd); + fetch -> tex_id, fetch -> wrmask, + disasm_a3xx_instr_name(fetch->tex_opc)); } const struct ir3_const_state *const_state = ir3_const_state(so); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index a644850..c21f56e 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -263,6 +263,7 @@ struct ir3_stream_output_info { */ struct ir3_sampler_prefetch { uint8_t src; + bool bindless; uint8_t samp_id; uint8_t tex_id; uint16_t samp_bindless_id; @@ -270,7 +271,7 @@ struct ir3_sampler_prefetch { uint8_t dst; uint8_t wrmask; uint8_t half_precision; - uint8_t cmd; + opc_t tex_opc; }; /* Configuration key used to identify a shader variant.. different diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 7c6930d..5c379d4 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -3226,12 +3226,24 @@ to upconvert to 32b float internally? - - - - - + + + Seem to break derivatives when there is a helper invocation + in the quad. Though from tests it doesn't seem to be + "disable helper invocations" flag. + + + + Bypass writing to regs and overwrite output with tex color. + TODO: How does it work with multiple prefetches? + + + + Doesn't seem to be a reg, size doesn't match and it doesn't do + anything observable. + + @@ -3241,14 +3253,10 @@ to upconvert to 32b float internally? - - + Results in color being zero + + + diff --git a/src/freedreno/registers/adreno/adreno_common.xml b/src/freedreno/registers/adreno/adreno_common.xml index 212839e..677896a 100644 --- a/src/freedreno/registers/adreno/adreno_common.xml +++ b/src/freedreno/registers/adreno/adreno_common.xml @@ -376,5 +376,25 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> + + Blob (v615) seem to only use SAM and I wasn't able to coerce + it to produce any other command. + Probably valid for a4xx+ but not enabled or tested on anything + but a6xx. + + + Produces garbage + + + + + + + Causes reads from an invalid address + + Results in color being zero + + + diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 0993c96..ec249d3 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1468,6 +1468,17 @@ tu6_emit_vpc(struct tu_cs *cs, tu6_emit_vpc_varying_modes(cs, fs, last_shader); } +static enum a6xx_tex_prefetch_cmd +tu6_tex_opc_to_prefetch_cmd(opc_t tex_opc) +{ + switch (tex_opc) { + case OPC_SAM: + return TEX_PREFETCH_SAM; + default: + unreachable("Unknown tex opc for prefeth cmd"); + } +} + void tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) { @@ -1494,8 +1505,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch); tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) | - A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) | - 0x7000); // XXX); + COND(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]), + A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE)); for (int i = 0; i < fs->num_sampler_prefetch; i++) { const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) | @@ -1504,7 +1515,9 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) | A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) | COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) | - A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd)); + COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) | + A6XX_SP_FS_PREFETCH_CMD_CMD( + tu6_tex_opc_to_prefetch_cmd(prefetch->tex_opc))); } if (fs->num_sampler_prefetch > 0) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index da3cd39..f75f427 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -388,6 +388,17 @@ fd6_emit_tess_bos(struct fd_screen *screen, struct fd_ringbuffer *ring, OUT_RELOC(ring, screen->tess_bo, 0, 0, 0); } +static enum a6xx_tex_prefetch_cmd +tex_opc_to_prefetch_cmd(opc_t tex_opc) +{ + switch (tex_opc) { + case OPC_SAM: + return TEX_PREFETCH_SAM; + default: + unreachable("Unknown tex opc for prefeth cmd"); + } +} + static void setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, struct fd6_program_state *state, @@ -539,8 +550,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_PKT4(ring, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch); OUT_RING(ring, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) | - A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) | - 0x7000); // XXX + COND(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]), + A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE)); for (int i = 0; i < fs->num_sampler_prefetch; i++) { const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; OUT_RING(ring, @@ -550,7 +561,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) | A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) | COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) | - A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd)); + COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) | + A6XX_SP_FS_PREFETCH_CMD_CMD( + tex_opc_to_prefetch_cmd(prefetch->tex_opc))); } OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1); -- 2.7.4