freedreno: Add more asserts for DST_OFF/NUM_UNIT in indirect const uploads.
authorEric Anholt <eric@anholt.net>
Fri, 31 Jul 2020 16:54:09 +0000 (09:54 -0700)
committerMarge Bot <eric+marge@anholt.net>
Wed, 5 Aug 2020 23:06:55 +0000 (23:06 +0000)
These are just empirical alignment numbers from looking at dEQP traces of
the blob driver (a330, a418, a540, a618, a630), with one exception noted
in the comments.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5990>

src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/a5xx/fd5_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_const.c

index 5d5be9d..a689a79 100644 (file)
@@ -79,13 +79,22 @@ fd3_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v
                uint32_t regid, uint32_t offset, uint32_t sizedwords,
                struct fd_bo *bo)
 {
+       uint32_t dst_off = regid / 2;
+       /* The blob driver aligns all const uploads dst_off to 64.  We've been
+        * successfully aligning to 8 vec4s as const_upload_unit so far with no
+        * ill effects.
+        */
+       assert(dst_off % 16 == 0);
+       uint32_t num_unit = sizedwords / 2;
+       assert(num_unit % 2 == 0);
+
        emit_const_asserts(ring, v, regid, sizedwords);
 
        OUT_PKT3(ring, CP_LOAD_STATE, 2);
-       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(dst_off) |
                        CP_LOAD_STATE_0_STATE_SRC(SS_INDIRECT) |
                        CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) |
-                       CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
+                       CP_LOAD_STATE_0_NUM_UNIT(num_unit));
        OUT_RELOC(ring, bo, offset,
                        CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
 }
index 603a81f..9c9e2a1 100644 (file)
@@ -74,13 +74,18 @@ fd4_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v
                uint32_t regid, uint32_t offset, uint32_t sizedwords,
                struct fd_bo *bo)
 {
+       uint32_t dst_off = regid / 4;
+       assert(dst_off % 4 == 0);
+       uint32_t num_unit = sizedwords / 4;
+       assert(num_unit % 4 == 0);
+
        emit_const_asserts(ring, v, regid, sizedwords);
 
        OUT_PKT3(ring, CP_LOAD_STATE4, 2);
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
+       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
                        CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
                        CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4));
+                       CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
        OUT_RELOC(ring, bo, offset,
                        CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
 }
index 1a1037c..d1e86a5 100644 (file)
@@ -77,13 +77,18 @@ static void
 fd5_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
                uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
 {
+       uint32_t dst_off = regid / 4;
+       assert(dst_off % 4 == 0);
+       uint32_t num_unit = sizedwords / 4;
+       assert(num_unit % 4 == 0);
+
        emit_const_asserts(ring, v, regid, sizedwords);
 
        OUT_PKT7(ring, CP_LOAD_STATE4, 3);
-       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) |
+       OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) |
                        CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) |
                        CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) |
-                       CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4));
+                       CP_LOAD_STATE4_0_NUM_UNIT(num_unit));
        OUT_RELOC(ring, bo, offset,
                        CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0);
 }
index 419a22d..68a2487 100644 (file)
@@ -77,16 +77,21 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring,
                const struct ir3_shader_variant *v, uint32_t regid,
                uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
 {
+       uint32_t dst_off = regid / 4;
+       assert(dst_off % 4 == 0);
+       uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
+       assert(num_unit % 4 == 0);
+
        emit_const_asserts(ring, v, regid, sizedwords);
 
        if (fd6_geom_stage(v->type)) {
                OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
                                CP_LOAD_STATE6_0(
-                                       .dst_off     = regid/4,
+                                       .dst_off     = dst_off,
                                        .state_type  = ST6_CONSTANTS,
                                        .state_src   = SS6_INDIRECT,
                                        .state_block = fd6_stage2shadersb(v->type),
-                                       .num_unit    = DIV_ROUND_UP(sizedwords, 4)
+                                       .num_unit    = num_unit,
                                        ),
                                CP_LOAD_STATE6_EXT_SRC_ADDR(
                                        .bo          = bo,
@@ -96,11 +101,11 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring,
        } else {
                OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
                                CP_LOAD_STATE6_0(
-                                       .dst_off     = regid/4,
+                                       .dst_off     = dst_off,
                                        .state_type  = ST6_CONSTANTS,
                                        .state_src   = SS6_INDIRECT,
                                        .state_block = fd6_stage2shadersb(v->type),
-                                       .num_unit    = DIV_ROUND_UP(sizedwords, 4)
+                                       .num_unit    = num_unit,
                                        ),
                                CP_LOAD_STATE6_EXT_SRC_ADDR(
                                        .bo          = bo,