freedreno/ir3: Align driver param upload size/offset for indirect uploads.
authorEmma Anholt <emma@anholt.net>
Wed, 18 Aug 2021 19:49:10 +0000 (12:49 -0700)
committerEmma Anholt <emma@anholt.net>
Thu, 19 Aug 2021 21:43:06 +0000 (14:43 -0700)
For indirect draws, we have to upload some of the params as indirect
references, which have a more strict size requirement.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12455>

src/freedreno/ci/deqp-freedreno-a530-fails.txt
src/freedreno/ci/deqp-freedreno-a630-fails.txt
src/freedreno/ir3/ir3_nir.c
src/gallium/drivers/freedreno/ci/piglit-freedreno-a530-fails.txt
src/gallium/drivers/freedreno/ir3/ir3_const.h

index fa46db2..24fdfe5 100644 (file)
@@ -142,17 +142,12 @@ KHR-GLES31.core.arrays_of_arrays.InteractionArgumentAliasing6,Crash
 KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls1,Fail
 KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls2,Fail
 
-# "gl_NumWorkGroups: Invalid data at index 0"
-KHR-GLES31.core.compute_shader.built-in-variables,Fail
-
 # "Got red: 1, expected 0.00392157, at (1, 0)"
 KHR-GLES31.core.compute_shader.resource-image,Fail
 
 # "../src/gallium/drivers/freedreno/a5xx/fd5_emit.c:82: fd5_emit_const_bo: Assertion `dst_off % 4 == 0' failed."
-KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-arrays,Crash
-KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-elements,Crash
-KHR-GLES31.core.draw_indirect.basic-drawArrays-vertexIds,Crash
-KHR-GLES31.core.draw_indirect.basic-drawElements-vertexIds,Crash
+KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-arrays,Fail
+KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-elements,Fail
 
 # "drawTestCompute failed expected: RGBA(4, 3, 2, 1) actual: RGBA(0, 255, 0, 255)"
 KHR-GLES31.core.layout_binding.sampler2DArray_layout_binding_texture_ComputeShader,Fail
index a57f02a..767bb04 100644 (file)
@@ -25,9 +25,6 @@ KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail
 KHR-GLES31.core.gpu_shader5.fma_precision_vec3,Fail
 KHR-GLES31.core.gpu_shader5.fma_precision_vec4,Fail
 
-# "gl_NumWorkGroups: Invalid data at index 2"
-KHR-GLES31.core.compute_shader.built-in-variables,Fail
-
 # "Got red: 1, expected 0.00392157, at (1, 0)"
 KHR-GLES31.core.compute_shader.resource-image,Fail
 
index 34fd0ec..25dd0f5 100644 (file)
@@ -860,9 +860,6 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
 
    const_state->num_ubos = nir->info.num_ubos;
 
-   /* num_driver_params is scalar, align to vec4: */
-   const_state->num_driver_params = align(const_state->num_driver_params, 4);
-
    debug_assert((const_state->ubo_state.size % 16) == 0);
    unsigned constoff = const_state->ubo_state.size / 16;
    unsigned ptrsz = ir3_pointer_size(compiler);
@@ -879,12 +876,26 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
    }
 
    if (const_state->num_driver_params > 0) {
+      /* num_driver_params in dwords.  we only need to align to vec4s for the
+       * common case of immediate constant uploads, but for indirect dispatch
+       * the constants may also be indirect and so we have to align the area in
+       * const space to that requirement.
+       */
+      const_state->num_driver_params = align(const_state->num_driver_params, 4);
+      unsigned upload_unit = 1;
+      if (v->type == MESA_SHADER_COMPUTE ||
+          (const_state->num_driver_params >= IR3_DP_VTXID_BASE)) {
+         upload_unit = compiler->const_upload_unit;
+      }
+
       /* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */
       if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6)
          constoff = MAX2(constoff, 1);
+      constoff = align(constoff, upload_unit);
       const_state->offsets.driver_param = constoff;
+
+      constoff += align(const_state->num_driver_params / 4, upload_unit);
    }
-   constoff += const_state->num_driver_params / 4;
 
    if ((v->type == MESA_SHADER_VERTEX) && (compiler->gen < 5) &&
        v->shader->stream_output.num_outputs > 0) {
index d6a4447..1f83f45 100644 (file)
@@ -103,8 +103,6 @@ spec@arb_depth_buffer_float@fbo-depthstencil-gl_depth32f_stencil8-copypixels,Fai
 spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-blit,Fail
 spec@arb_depth_buffer_float@fbo-stencil-gl_depth32f_stencil8-copypixels,Fail
 spec@arb_direct_state_access@gettextureimage-formats,Crash
-spec@arb_draw_indirect@gl_vertexid used with gldrawarraysindirect,Crash
-spec@arb_draw_indirect@gl_vertexid used with gldrawelementsindirect,Crash
 spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-atomic,Fail
 spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-atomic@MS4,Fail
 spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query,Fail
@@ -120,7 +118,6 @@ spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl
 spec@arb_framebuffer_object@framebuffer-blit-levels draw stencil,Fail
 spec@arb_framebuffer_object@framebuffer-blit-levels read stencil,Fail
 spec@arb_map_buffer_alignment@arb_map_buffer_alignment-map-invalidate-range,Fail
-spec@arb_multi_draw_indirect@gl-3.0-multidrawarrays-vertexid -indirect,Crash
 spec@arb_occlusion_query@occlusion_query_order,Fail
 spec@arb_point_sprite@arb_point_sprite-interactions 1.0,Fail
 spec@arb_separate_shader_objects@400 combinations by location,Fail
index 85f5aef..ceed8c8 100644 (file)
@@ -479,9 +479,10 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
     * stream so need to copy them to bo.
     */
    if (indirect && needs_vtxid_base) {
+      uint32_t vertex_params_area = align(vertex_params_size, 16);
       struct pipe_resource *vertex_params_rsc =
          pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER,
-                            PIPE_USAGE_STREAM, vertex_params_size * 4);
+                            PIPE_USAGE_STREAM, vertex_params_area * 4);
       unsigned src_off = indirect->offset;
       ;
       void *ptr;
@@ -501,7 +502,7 @@ ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
       ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer,
                               src_off, 1);
 
-      emit_const_prsc(ring, v, offset * 4, 0, vertex_params_size,
+      emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area,
                       vertex_params_rsc);
 
       pipe_resource_reference(&vertex_params_rsc, NULL);