* Rob Clark <robclark@freedesktop.org>
*/
+#include "drm/freedreno_ringbuffer.h"
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h"
assert_dt
{
const struct ir3_info *i = &v->info;
- enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64;
+ enum a6xx_threadsize thrsz_cs = i->double_threadsize ? THREAD128 : THREAD64;
OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true,
.ds_state = true, .gs_state = true,
OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1);
OUT_RING(ring,
- A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
+ A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz_cs) |
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORKGROUP_ID);
+ enum a6xx_threadsize thrsz = ctx->screen->info->a6xx.supports_double_threadsize ? thrsz_cs : THREAD128;
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2);
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
+ if (!ctx->screen->info->a6xx.supports_double_threadsize) {
+ OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1);
+ OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(thrsz_cs));
+ }
if (ctx->screen->info->a6xx.has_lpac) {
OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 2);
if (ctx->screen->gen >= 6)
ir3_nir_lower_io_to_bindless(nir);
+ enum ir3_wavesize_option api_wavesize = IR3_SINGLE_OR_DOUBLE;
+ enum ir3_wavesize_option real_wavesize = IR3_SINGLE_OR_DOUBLE;
+
+ if (ctx->screen->gen >= 6 && !ctx->screen->info->a6xx.supports_double_threadsize) {
+ api_wavesize = IR3_SINGLE_ONLY;
+ real_wavesize = IR3_SINGLE_ONLY;
+ }
+
struct ir3_shader *shader =
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
/* TODO: force to single on a6xx with legacy
* ballot extension that uses 64-bit masks
*/
- .api_wavesize = IR3_SINGLE_OR_DOUBLE,
- .real_wavesize = IR3_SINGLE_OR_DOUBLE,
+ .api_wavesize = api_wavesize,
+ .real_wavesize = real_wavesize,
}, NULL);
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
shader->cs.req_local_mem = cso->static_shared_mem;