radeonsi: Process multiple patches per threadgroup.

author Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

Mon, 2 May 2016 13:00:21 +0000 (15:00 +0200)

committer Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

Thu, 26 May 2016 20:07:04 +0000 (22:07 +0200)
author Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 2 May 2016 13:00:21 +0000 (15:00 +0200)
committer Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 26 May 2016 20:07:04 +0000 (22:07 +0200)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c

index 6fe2619..c8b87a9 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -108,20 +108,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
         unsigned input_patch_size, output_patch_size, output_patch0_offset;
         unsigned perpatch_output_offset, lds_size, ls_rsrc2;
         unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
-       unsigned offchip_layout;
-
-       *num_patches = 1; /* TODO: calculate this */
-
-       if (sctx->last_ls == ls->current &&
-           sctx->last_tcs == tcs &&
-           sctx->last_tes_sh_base == tes_sh_base &&
-           sctx->last_num_tcs_input_cp == num_tcs_input_cp)
-               return;
-
-       sctx->last_ls = ls->current;
-       sctx->last_tcs = tcs;
-       sctx->last_tes_sh_base = tes_sh_base;
-       sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+       unsigned offchip_layout, hardware_lds_size;
  
         /* This calculates how shader inputs and outputs among VS, TCS, and TES
          * are laid out in LDS. */
@@ -146,7 +133,29 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
         pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
         output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
  
-       output_patch0_offset = sctx->tcs_shader.cso ? input_patch_size * *num_patches : 0;
+       /* Ensure that we only need one wave per SIMD so we don't need to check
+        * resource usage. Also ensures that the number of tcs in and out
+        * vertices per threadgroup are at most 256.
+        */
+       *num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
+
+       /* Make sure that the data fits in LDS. This assumes the shaders only
+        * use LDS for the inputs and outputs.
+        */
+       hardware_lds_size = sctx->b.chip_class >= CIK ? 65536 : 32768;
+       *num_patches = MIN2(*num_patches, hardware_lds_size / (input_patch_size +
+                                                              output_patch_size));
+
+       /* Make sure the output data fits in the offchip buffer */
+       *num_patches = MIN2(*num_patches, SI_TESS_OFFCHIP_BLOCK_SIZE /
+                                         output_patch_size);
+
+       /* Not necessary for correctness, but improves performance. The
+        * specific value is taken from the proprietary driver.
+        */
+       *num_patches = MIN2(*num_patches, 40);
+
+       output_patch0_offset = input_patch_size * *num_patches;
         perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
  
         lds_size = output_patch0_offset + output_patch_size * *num_patches;
@@ -160,6 +169,17 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
                 ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
         }
  
+       if (sctx->last_ls == ls->current &&
+           sctx->last_tcs == tcs &&
+           sctx->last_tes_sh_base == tes_sh_base &&
+           sctx->last_num_tcs_input_cp == num_tcs_input_cp)
+               return;
+
+       sctx->last_ls = ls->current;
+       sctx->last_tcs = tcs;
+       sctx->last_tes_sh_base = tes_sh_base;
+       sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
         /* Due to a hw bug, RSRC2_LS must be written twice with another
          * LS register written in between. */
         if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
author	Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
	Mon, 2 May 2016 13:00:21 +0000 (15:00 +0200)
committer	Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
	Thu, 26 May 2016 20:07:04 +0000 (22:07 +0200)