The initial logic was to remember the place were SPI_SHADER_PGM_LO_*
are written, then assume that we can get the register offset because
the sequence would always be:
PKT3_SET_SH_REG
SPI_SHADER_PGM_LO_* register offset
VA low 32 bits value <- reg_va_low_idx
The problem is that this sequence isn't guaranteed, for instance we
can get this instead:
0
c0067600 |
1
00000046 |
2
003ffffd | SPI_SHADER_PGM_RSRC3_VS
3
00000020 | SPI_SHADER_LATE_ALLOC_VS
4 *
00002080 | SPI_SHADER_PGM_LO_VS
5
00000080 | SPI_SHADER_PGM_HI_VS
So the assert in si_state_draw.cpp would fail as well as the VA
update logic.
So instead remember which the SPI_SHADER_PGM_LO_* offset, and the low
32 bits of the VA in si_update_shaders.
Fixes:
8034a71430b ("radeonsi/sqtt: re-export shaders in a single bo")
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26774>
(cherry picked from commit
b55a2065e03e0f033217b1b58a0c18e3a5e86136)
"description": "radeonsi/sqtt: rework pm4.reg_va_low_idx",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": "8034a71430be0b6473449028d90937729b77d6d9",
"notes": null
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
state->screen->info.family, reg_offset),
"SPI_SHADER_PGM_LO_")) {
- state->reg_va_low_idx = get_packed_reg_valueN_idx(state, i);
+ state->spi_shader_pgm_lo_reg = reg_offset;
break;
}
}
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
state->screen->info.family, reg_base_offset + i * 4),
"SPI_SHADER_PGM_LO_")) {
- state->reg_va_low_idx = state->last_pm4 + 2 + i;
+ state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4;
+
break;
}
}
uint16_t max_dw;
/* Used by SQTT to override the shader address */
- uint16_t reg_va_low_idx;
+ uint32_t spi_shader_pgm_lo_reg;
/* This must be the last field because the array can continue after the structure. */
uint32_t pm4[64];
struct si_pm4_state *pm4 = &shader->pm4;
- uint32_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8;
- assert(PKT3_IT_OPCODE_G(pm4->pm4[pm4->reg_va_low_idx - 2]) == PKT3_SET_SH_REG);
- uint32_t reg = (pm4->pm4[pm4->reg_va_low_idx - 1] << 2) + SI_SH_REG_OFFSET;
+ uint64_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8;
+ uint32_t reg = pm4->spi_shader_pgm_lo_reg;
si_pm4_set_reg(&pipeline->pm4, reg, va_low);
}
}
assert(0);
}
- assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.reg_va_low_idx != 0);
+ assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.spi_shader_pgm_lo_reg != 0);
}
static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,