vc4: Add simulator kernel validation for multithreaded fragment shaders.
authorJonas Pfeil <pfeiljonas@gmx.de>
Fri, 11 Nov 2016 01:52:03 +0000 (17:52 -0800)
committerEric Anholt <eric@anholt.net>
Sun, 13 Nov 2016 03:21:46 +0000 (19:21 -0800)
This is Jonas Pfeil's code from the kernel, brought back to Mesa by
anholt.

src/gallium/drivers/vc4/kernel/vc4_drv.h
src/gallium/drivers/vc4/kernel/vc4_validate.c
src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c

index 90f4539..8f5ed00 100644 (file)
@@ -150,6 +150,8 @@ struct vc4_validated_shader_info
 
        uint32_t num_uniform_addr_offsets;
        uint32_t *uniform_addr_offsets;
+
+       bool is_threaded;
 };
 
 /* vc4_validate.c */
index a9dce1f..bd193b9 100644 (file)
@@ -780,11 +780,6 @@ validate_gl_shader_rec(struct drm_device *dev,
        exec->shader_rec_v += roundup(packet_size, 16);
        exec->shader_rec_size -= packet_size;
 
-       if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
-               DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
-               return -EINVAL;
-       }
-
        for (i = 0; i < shader_reloc_count; i++) {
                if (src_handles[i] > exec->bo_count) {
                        DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
@@ -801,6 +796,18 @@ validate_gl_shader_rec(struct drm_device *dev,
                        return -EINVAL;
        }
 
+       if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
+           to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
+               DRM_ERROR("Thread mode of CL and FS do not match\n");
+               return -EINVAL;
+       }
+
+       if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
+           to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
+               DRM_ERROR("cs and vs cannot be threaded\n");
+               return -EINVAL;
+       }
+
        for (i = 0; i < shader_reloc_count; i++) {
                struct vc4_validated_shader_info *validated_shader;
                uint32_t o = shader_reloc_offsets[i];
index 0ff3d01..d93f523 100644 (file)
@@ -84,6 +84,14 @@ struct vc4_shader_validation_state {
         * basic blocks.
         */
        bool needs_uniform_address_for_loop;
+
+       /* Set when we find an instruction which violates the criterion for a
+        * threaded shader. These are:
+        *      - only write the lower half of the register space
+        *      - last thread switch signaled at the end
+        * So track the usage of the thread switches and the register usage.
+        */
+       bool all_registers_used;
 };
 
 static uint32_t
@@ -119,6 +127,12 @@ raddr_add_a_to_live_reg_index(uint64_t inst)
                return ~0;
 }
 
+static bool live_reg_is_upper_half(uint32_t lri)
+{
+       return  (lri >=16 && lri < 32) ||
+               (lri >=32 + 16 && lri < 32 + 32);
+}
+
 static bool
 is_tmu_submit(uint32_t waddr)
 {
@@ -385,6 +399,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
                } else {
                        validation_state->live_immediates[lri] = ~0;
                }
+
+               if (live_reg_is_upper_half(lri))
+                       validation_state->all_registers_used = true;
        }
 
        switch (waddr) {
@@ -593,6 +610,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
                }
        }
 
+       if ((raddr_a >= 16 && raddr_a < 32) ||
+           (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
+               validation_state->all_registers_used = true;
+       }
+
        return true;
 }
 
@@ -748,6 +770,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 {
        bool found_shader_end = false;
        int shader_end_ip = 0;
+       uint32_t last_thread_switch_ip = -3;
        uint32_t ip;
        struct vc4_validated_shader_info *validated_shader = NULL;
        struct vc4_shader_validation_state validation_state;
@@ -780,6 +803,16 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                if (!vc4_handle_branch_target(&validation_state))
                        goto fail;
 
+               if (ip == last_thread_switch_ip + 3) {
+                       /* Reset r0-r3 live clamp data */
+                       int i;
+                       for (i = 64; i < LIVE_REG_COUNT; i++) {
+                               validation_state.live_min_clamp_offsets[i] = ~0;
+                               validation_state.live_max_clamp_regs[i] = false;
+                               validation_state.live_immediates[i] = ~0;
+                       }
+               }
+
                switch (sig) {
                case QPU_SIG_NONE:
                case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -789,6 +822,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                case QPU_SIG_LOAD_TMU1:
                case QPU_SIG_PROG_END:
                case QPU_SIG_SMALL_IMM:
+               case QPU_SIG_THREAD_SWITCH:
+               case QPU_SIG_LAST_THREAD_SWITCH:
                        if (!check_instruction_writes(validated_shader,
                                                      &validation_state)) {
                                DRM_ERROR("Bad write at ip %d\n", ip);
@@ -804,6 +839,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                                shader_end_ip = ip;
                        }
 
+                       if (sig == QPU_SIG_THREAD_SWITCH ||
+                           sig == QPU_SIG_LAST_THREAD_SWITCH) {
+                               validated_shader->is_threaded = true;
+
+                               if (ip < last_thread_switch_ip + 3) {
+                                       DRM_ERROR("Thread switch too soon after "
+                                                 "last switch at ip %d\n", ip);
+                                       goto fail;
+                               }
+                               last_thread_switch_ip = ip;
+                       }
+
                        break;
 
                case QPU_SIG_LOAD_IMM:
@@ -818,6 +865,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                        if (!check_branch(inst, validated_shader,
                                          &validation_state, ip))
                                goto fail;
+
+                       if (ip < last_thread_switch_ip + 3) {
+                               DRM_ERROR("Branch in thread switch at ip %d",
+                                         ip);
+                               goto fail;
+                       }
+
                        break;
                default:
                        DRM_ERROR("Unsupported QPU signal %d at "
@@ -839,6 +893,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
                goto fail;
        }
 
+       /* Might corrupt other thread */
+       if (validated_shader->is_threaded &&
+           validation_state.all_registers_used) {
+               DRM_ERROR("Shader uses threading, but uses the upper "
+                         "half of the registers, too\n");
+               goto fail;
+       }
+
        /* If we did a backwards branch and we haven't emitted a uniforms
         * reset since then, we still need the uniforms stream to have the
         * uniforms address available so that the backwards branch can do its