i965/fs: force pull model for 64-bit GS inputs

author Iago Toral Quiroga <itoral@igalia.com>

Wed, 27 Sep 2017 09:36:31 +0000 (11:36 +0200)

committer Iago Toral Quiroga <itoral@igalia.com>

Fri, 29 Sep 2017 06:18:25 +0000 (08:18 +0200)
author Iago Toral Quiroga <itoral@igalia.com>
Wed, 27 Sep 2017 09:36:31 +0000 (11:36 +0200)
committer Iago Toral Quiroga <itoral@igalia.com>
Fri, 29 Sep 2017 06:18:25 +0000 (08:18 +0200)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index eb9b4c3..e33cb0e 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5602,6 +5602,17 @@ fs_visitor::setup_gs_payload()
        payload.num_regs++;
     }
  
+   /* Always enable VUE handles so we can safely use pull model if needed.
+    *
+    * The push model for a GS uses a ton of register space even for trivial
+    * scenarios with just a few inputs, so just make things easier and a bit
+    * safer by always having pull model available.
+    */
+   gs_prog_data->base.include_vue_handles = true;
+
+   /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
+   payload.num_regs += nir->info.gs.vertices_in;
+
     /* Use a maximum of 24 registers for push-model inputs. */
     const unsigned max_push_components = 24;
  
@@ -5612,12 +5623,7 @@ fs_visitor::setup_gs_payload()
      * have to multiply by VerticesIn to obtain the total storage requirement.
      */
     if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
-       max_push_components || gs_prog_data->invocations > 1) {
-      gs_prog_data->base.include_vue_handles = true;
-
-      /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
-      payload.num_regs += nir->info.gs.vertices_in;
-
+       max_push_components) {
        vue_prog_data->urb_read_length =
           ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;
     }
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index d760946..aa57bb9 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1915,7 +1915,9 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
     const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
  
     /* TODO: figure out push input layout for invocations == 1 */
+   /* TODO: make this work with 64-bit inputs */
     if (gs_prog_data->invocations == 1 &&
+       type_sz(dst.type) <= 4 &&
         offset_const != NULL && vertex_const != NULL &&
         4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
        int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
@@ -1928,7 +1930,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
     }
  
     /* Resort to the pull model.  Ensure the VUE handles are provided. */
-   gs_prog_data->base.include_vue_handles = true;
+   assert(gs_prog_data->base.include_vue_handles);
  
     unsigned first_icp_handle = gs_prog_data->include_primitive_id ? 3 : 2;
     fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
author	Iago Toral Quiroga <itoral@igalia.com>
	Wed, 27 Sep 2017 09:36:31 +0000 (11:36 +0200)
committer	Iago Toral Quiroga <itoral@igalia.com>
	Fri, 29 Sep 2017 06:18:25 +0000 (08:18 +0200)
src/intel/compiler/brw_fs.cpp		patch \| blob \| history
src/intel/compiler/brw_fs_nir.cpp		patch \| blob \| history