From 56ab105182e5a3129ee646cf6382a5699bf14bce Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 31 Jul 2020 09:54:09 -0700 Subject: [PATCH] freedreno: Add more asserts for DST_OFF/NUM_UNIT in indirect const uploads. These are just empirical alignment numbers from looking at dEQP traces of the blob driver (a330, a418, a540, a618, a630), with one exception noted in the comments. Part-of: --- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 13 +++++++++++-- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 9 +++++++-- src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 9 +++++++-- src/gallium/drivers/freedreno/a6xx/fd6_const.c | 13 +++++++++---- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 5d5be9d..a689a79 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -79,13 +79,22 @@ fd3_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 2; + /* The blob driver aligns all const uploads dst_off to 64. We've been + * successfully aligning to 8 vec4s as const_upload_unit so far with no + * ill effects. + */ + assert(dst_off % 16 == 0); + uint32_t num_unit = sizedwords / 2; + assert(num_unit % 2 == 0); + emit_const_asserts(ring, v, regid, sizedwords); OUT_PKT3(ring, CP_LOAD_STATE, 2); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(dst_off) | CP_LOAD_STATE_0_STATE_SRC(SS_INDIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) | - CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); + CP_LOAD_STATE_0_NUM_UNIT(num_unit)); OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 603a81f..9c9e2a1 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -74,13 +74,18 @@ fd4_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 4; + assert(dst_off % 4 == 0); + uint32_t num_unit = sizedwords / 4; + assert(num_unit % 4 == 0); + emit_const_asserts(ring, v, regid, sizedwords); OUT_PKT3(ring, CP_LOAD_STATE4, 2); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) | CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) | - CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4)); + CP_LOAD_STATE4_0_NUM_UNIT(num_unit)); OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0); } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 1a1037c..d1e86a5 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -77,13 +77,18 @@ static void fd5_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 4; + assert(dst_off % 4 == 0); + uint32_t num_unit = sizedwords / 4; + assert(num_unit % 4 == 0); + emit_const_asserts(ring, v, regid, sizedwords); OUT_PKT7(ring, CP_LOAD_STATE4, 3); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) | CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) | - CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4)); + CP_LOAD_STATE4_0_NUM_UNIT(num_unit)); OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index 419a22d..68a2487 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -77,16 +77,21 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 4; + assert(dst_off % 4 == 0); + uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4); + assert(num_unit % 4 == 0); + emit_const_asserts(ring, v, regid, sizedwords); if (fd6_geom_stage(v->type)) { OUT_PKT(ring, CP_LOAD_STATE6_GEOM, CP_LOAD_STATE6_0( - .dst_off = regid/4, + .dst_off = dst_off, .state_type = ST6_CONSTANTS, .state_src = SS6_INDIRECT, .state_block = fd6_stage2shadersb(v->type), - .num_unit = DIV_ROUND_UP(sizedwords, 4) + .num_unit = num_unit, ), CP_LOAD_STATE6_EXT_SRC_ADDR( .bo = bo, @@ -96,11 +101,11 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring, } else { OUT_PKT(ring, CP_LOAD_STATE6_FRAG, CP_LOAD_STATE6_0( - .dst_off = regid/4, + .dst_off = dst_off, .state_type = ST6_CONSTANTS, .state_src = SS6_INDIRECT, .state_block = fd6_stage2shadersb(v->type), - .num_unit = DIV_ROUND_UP(sizedwords, 4) + .num_unit = num_unit, ), CP_LOAD_STATE6_EXT_SRC_ADDR( .bo = bo, -- 2.7.4