vk: Fix indirect push constants
authorKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Thu, 21 Jan 2016 18:43:32 +0000 (10:43 -0800)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Thu, 21 Jan 2016 19:10:11 +0000 (11:10 -0800)
This currently sets the base and size of all push constants to the
entire push constant block. The idea is that we'll use the base and size
to eventually optimize the amount we actually push, but for now we don't
do that.

src/glsl/nir/spirv/spirv_to_nir.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/vulkan/anv_nir_lower_push_constants.c

index d021122..74ad651 100644 (file)
@@ -1515,6 +1515,14 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
       instr->src[src++] = nir_src_for_ssa((*inout)->def);
    }
 
+   /* We set the base and size for push constant load to the entire push
+    * constant block for now.
+    */
+   if (op == nir_intrinsic_load_push_constant) {
+      instr->const_index[0] = 0;
+      instr->const_index[1] = 128;
+   }
+
    if (index)
       instr->src[src++] = nir_src_for_ssa(index);
 
index fc883f4..1ba5075 100644 (file)
@@ -1992,11 +1992,10 @@ fs_visitor::assign_constant_locations()
     */
    const unsigned int max_push_components = 16 * 8;
 
-   /* We push small arrays, but no bigger than 16 floats.  This is big enough
-    * for a vec4 but hopefully not large enough to push out other stuff.  We
-    * should probably use a better heuristic at some point.
+   /* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
+    * 128 bytes, which is the maximum vulkan push constant size.
     */
-   const unsigned int max_chunk_size = 16;
+   const unsigned int max_chunk_size = 32;
 
    unsigned int num_push_constants = 0;
    unsigned int num_pull_constants = 0;
index 7fc3953..53cd3d7 100644 (file)
@@ -43,45 +43,14 @@ lower_push_constants_block(nir_block *block, void *void_state)
       if (intrin->intrinsic != nir_intrinsic_load_push_constant)
          continue;
 
+      /* This wont work for vec4 stages. */
+      assert(state->is_scalar);
+
       assert(intrin->const_index[0] % 4 == 0);
-      unsigned dword_offset = intrin->const_index[0] / 4;
+      assert(intrin->const_index[1] == 128);
 
       /* We just turn them into uniform loads with the appropreate offset */
       intrin->intrinsic = nir_intrinsic_load_uniform;
-      intrin->const_index[0] = 0;
-      if (state->is_scalar) {
-         intrin->const_index[1] = dword_offset;
-      } else {
-         unsigned shift = dword_offset % 4;
-         /* Can't cross the vec4 boundary */
-         assert(shift + intrin->num_components <= 4);
-
-         /* vec4 shifts are in units of vec4's */
-         intrin->const_index[1] = dword_offset / 4;
-
-         if (shift) {
-            /* If there's a non-zero shift then we need to load a whole vec4
-             * and use a move to swizzle it into place.
-             */
-            assert(intrin->dest.is_ssa);
-            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
-                                                      nir_op_imov);
-            mov->src[0].src = nir_src_for_ssa(&intrin->dest.ssa);
-            for (unsigned i = 0; i < intrin->num_components; i++)
-               mov->src[0].swizzle[i] = i + shift;
-            mov->dest.write_mask = (1 << intrin->num_components) - 1;
-            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                              intrin->num_components, NULL);
-
-            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&mov->dest.dest.ssa));
-            nir_instr_insert_after(&intrin->instr, &mov->instr);
-
-            /* Stomp the number of components to 4 */
-            intrin->num_components = 4;
-            intrin->dest.ssa.num_components = 4;
-         }
-      }
    }
 
    return true;