radeonsi: Enable dynamic HS.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 2 May 2016 12:59:43 +0000 (14:59 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 26 May 2016 20:07:04 +0000 (22:07 +0200)
This allows running the TES on different CU's than the
TCS which results in performance improvements.

v2: Only write the control word from one invocation.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 11c7c38..166b2e8 100644 (file)
@@ -2532,7 +2532,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
        LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
        LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base;
        unsigned stride, outer_comps, inner_comps, i;
-       struct lp_build_if_state if_ctx;
+       struct lp_build_if_state if_ctx, inner_if_ctx;
 
        /* Do this only for invocation 0, because the tess levels are per-patch,
         * not per-vertex.
@@ -2604,12 +2604,23 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
        byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
                                  lp_build_const_int32(gallivm, 4 * stride), "");
 
-       /* Store the outputs. */
+       lp_build_if(&inner_if_ctx, gallivm,
+                   LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+                                 rel_patch_id, bld_base->uint_bld.zero, ""));
+
+       /* Store the dynamic HS control word. */
+       build_tbuffer_store_dwords(ctx, buffer,
+                                  lp_build_const_int32(gallivm, 0x80000000),
+                                  1, lp_build_const_int32(gallivm, 0), tf_base, 0);
+
+       lp_build_endif(&inner_if_ctx);
+
+       /* Store the tessellation factors. */
        build_tbuffer_store_dwords(ctx, buffer, vec0,
-                                  MIN2(stride, 4), byteoffset, tf_base, 0);
+                                  MIN2(stride, 4), byteoffset, tf_base, 4);
        if (vec1)
                build_tbuffer_store_dwords(ctx, buffer, vec1,
-                                          stride - 4, byteoffset, tf_base, 16);
+                                          stride - 4, byteoffset, tf_base, 20);
        lp_build_endif(&if_ctx);
 }
 
index 2aecfa3..116bf27 100644 (file)
@@ -1882,7 +1882,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
 
                if (sctx->tes_shader.cso) {
                        stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
-                                 S_028B54_HS_EN(1);
+                                 S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
 
                        if (sctx->gs_shader.cso)
                                stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |