intel/compiler: Keep track of compiled/spilled in brw_simd_selection_state

author Caio Oliveira <caio.oliveira@intel.com>

Tue, 8 Nov 2022 11:38:18 +0000 (03:38 -0800)

committer Marge Bot <emma+marge@anholt.net>

Tue, 15 Nov 2022 04:55:18 +0000 (04:55 +0000)
author Caio Oliveira <caio.oliveira@intel.com>
Tue, 8 Nov 2022 11:38:18 +0000 (03:38 -0800)
committer Marge Bot <emma+marge@anholt.net>
Tue, 15 Nov 2022 04:55:18 +0000 (04:55 +0000)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index c661a8c..0d35245 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7838,13 +7838,11 @@ brw_compile_cs(const struct brw_compiler *compiler,
                                      &prog_data->base, shader, dispatch_width,
                                               debug_enabled);
  
-      if (prog_data->prog_mask) {
-         unsigned first = ffs(prog_data->prog_mask) - 1;
+      const int first = brw_simd_first_compiled(simd_state);
+      if (first >= 0)
           v[simd]->import_uniforms(v[first].get());
-      }
  
-      const bool allow_spilling = !prog_data->prog_mask ||
-                                  nir->info.workgroup_size_variable;
+      const bool allow_spilling = first < 0 || nir->info.workgroup_size_variable;
  
        if (v[simd]->run_cs(allow_spilling)) {
           cs_fill_push_const_info(compiler->devinfo, prog_data);
diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp

index 64bcb52..2250771 100644 (file)
--- a/src/intel/compiler/brw_mesh.cpp
+++ b/src/intel/compiler/brw_mesh.cpp
@@ -300,8 +300,7 @@ brw_compile_task(const struct brw_compiler *compiler,
           v[simd]->import_uniforms(v[first].get());
        }
  
-      const bool allow_spilling = !prog_data->base.prog_mask;
-
+      const bool allow_spilling = !brw_simd_any_compiled(simd_state);
        if (v[simd]->run_task(allow_spilling))
           brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
        else
@@ -811,8 +810,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
           v[simd]->import_uniforms(v[first].get());
        }
  
-      const bool allow_spilling = !prog_data->base.prog_mask;
-
+      const bool allow_spilling = !brw_simd_any_compiled(simd_state);
        if (v[simd]->run_mesh(allow_spilling))
           brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
        else
diff --git a/src/intel/compiler/brw_private.h b/src/intel/compiler/brw_private.h

index 8c40d3c..b9dd686 100644 (file)
--- a/src/intel/compiler/brw_private.h
+++ b/src/intel/compiler/brw_private.h
@@ -32,7 +32,6 @@ unsigned brw_required_dispatch_width(const struct shader_info *info);
  static constexpr int SIMD_COUNT = 3;
  
  struct brw_simd_selection_state {
-
     void *mem_ctx;
     const struct intel_device_info *devinfo;
  
@@ -41,8 +40,25 @@ struct brw_simd_selection_state {
     unsigned required_width;
  
     const char *error[SIMD_COUNT];
+
+   bool compiled[SIMD_COUNT];
+   bool spilled[SIMD_COUNT];
  };
  
+inline int brw_simd_first_compiled(const brw_simd_selection_state &state)
+{
+   for (int i = 0; i < SIMD_COUNT; i++) {
+      if (state.compiled[i])
+         return i;
+   }
+   return -1;
+}
+
+inline bool brw_simd_any_compiled(const brw_simd_selection_state &state)
+{
+   return brw_simd_first_compiled(state) >= 0;
+}
+
  bool brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd);
  
  void brw_simd_mark_compiled(brw_simd_selection_state &state, unsigned simd, bool spilled);
diff --git a/src/intel/compiler/brw_simd_selection.cpp b/src/intel/compiler/brw_simd_selection.cpp

index 726e9a9..68e0cf4 100644 (file)
--- a/src/intel/compiler/brw_simd_selection.cpp
+++ b/src/intel/compiler/brw_simd_selection.cpp
@@ -47,14 +47,12 @@ test_bit(unsigned mask, unsigned bit) {
  }
  
  bool
-brw_simd_should_compile(brw_simd_selection_state &state,
-                        unsigned simd)
+brw_simd_should_compile(brw_simd_selection_state &state, unsigned simd)
  {
     assert(simd < SIMD_COUNT);
+   assert(!state.compiled[simd]);
  
     struct brw_cs_prog_data *prog_data = state.prog_data;
-   assert(!test_bit(prog_data->prog_mask, simd));
-
     const unsigned width = 8u << simd;
  
     /* For shaders with variable size workgroup, in most cases we can compile
@@ -64,7 +62,7 @@ brw_simd_should_compile(brw_simd_selection_state &state,
     const bool workgroup_size_variable = prog_data->local_size[0] == 0;
  
     if (!workgroup_size_variable) {
-      if (test_bit(prog_data->prog_spilled, simd)) {
+      if (state.spilled[simd]) {
           state.error[simd] = ralloc_asprintf(
              state.mem_ctx, "SIMD%u skipped because would spill", width);
           return false;
@@ -83,7 +81,7 @@ brw_simd_should_compile(brw_simd_selection_state &state,
           return false;
        }
  
-      if (simd > 0 && test_bit(prog_data->prog_mask, simd - 1) &&
+      if (simd > 0 && state.compiled[simd - 1] &&
            workgroup_size <= (width / 2)) {
           state.error[simd] = ralloc_asprintf(
              state.mem_ctx, "SIMD%u skipped because workgroup size %u already fits in SIMD%u",
@@ -103,7 +101,7 @@ brw_simd_should_compile(brw_simd_selection_state &state,
         * TODO: Use performance_analysis and drop this rule.
         */
        if (width == 32) {
-         if (!INTEL_DEBUG(DEBUG_DO32) && prog_data->prog_mask) {
+         if (!INTEL_DEBUG(DEBUG_DO32) && (state.compiled[0] || state.compiled[1])) {
              state.error[simd] = ralloc_strdup(
                 state.mem_ctx, "SIMD32 skipped because not required");
              return false;
@@ -147,35 +145,32 @@ void
  brw_simd_mark_compiled(brw_simd_selection_state &state, unsigned simd, bool spilled)
  {
     assert(simd < SIMD_COUNT);
+   assert(!state.compiled[simd]);
  
-   struct brw_cs_prog_data *prog_data = state.prog_data;
-   assert(!test_bit(prog_data->prog_mask, simd));
-
-   prog_data->prog_mask |= 1u << simd;
+   state.compiled[simd] = true;
+   state.prog_data->prog_mask |= 1u << simd;
  
     /* If a SIMD spilled, all the larger ones would spill too. */
     if (spilled) {
-      for (unsigned i = simd; i < SIMD_COUNT; i++)
-         prog_data->prog_spilled |= 1u << i;
+      for (unsigned i = simd; i < SIMD_COUNT; i++) {
+         state.spilled[i] = true;
+         state.prog_data->prog_spilled |= 1u << i;
+      }
     }
  }
  
  int
  brw_simd_select(const struct brw_simd_selection_state &state)
  {
-   const struct brw_cs_prog_data *prog_data = state.prog_data;
-   assert((prog_data->prog_mask & ~0x7u) == 0);
-   const unsigned not_spilled_mask =
-      prog_data->prog_mask & ~prog_data->prog_spilled;
-
-   /* Util functions index bits from 1 instead of 0, adjust before return. */
-
-   if (not_spilled_mask)
-      return util_last_bit(not_spilled_mask) - 1;
-   else if (prog_data->prog_mask)
-      return ffs(prog_data->prog_mask) - 1;
-   else
-      return -1;
+   for (int i = SIMD_COUNT - 1; i >= 0; i--) {
+      if (state.compiled[i] && !state.spilled[i])
+         return i;
+   }
+   for (int i = SIMD_COUNT - 1; i >= 0; i--) {
+      if (state.compiled[i])
+         return i;
+   }
+   return -1;
  }
  
  int
@@ -186,9 +181,18 @@ brw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo,
     if (!sizes || (prog_data->local_size[0] == sizes[0] &&
                    prog_data->local_size[1] == sizes[1] &&
                    prog_data->local_size[2] == sizes[2])) {
-      const brw_simd_selection_state simd_state{
+      brw_simd_selection_state simd_state{
           .prog_data = const_cast<struct brw_cs_prog_data *>(prog_data),
        };
+
+      /* Propagate the prog_data information back to the simd_state,
+       * so we can use select() directly.
+       */
+      for (int i = 0; i < SIMD_COUNT; i++) {
+         simd_state.compiled[i] = test_bit(prog_data->prog_mask, i);
+         simd_state.spilled[i] = test_bit(prog_data->prog_spilled, i);
+      }
+
        return brw_simd_select(simd_state);
     }
  
diff --git a/src/intel/compiler/test_simd_selection.cpp b/src/intel/compiler/test_simd_selection.cpp

index c8a6f02..079e2a9 100644 (file)
--- a/src/intel/compiler/test_simd_selection.cpp
+++ b/src/intel/compiler/test_simd_selection.cpp
@@ -366,3 +366,33 @@ TEST_F(SIMDSelectionCS, Require32ErrorWhenNotCompile)
  
     ASSERT_EQ(brw_simd_select(simd_state), -1);
  }
+
+TEST_F(SIMDSelectionCS, FirstCompiledIsSIMD8)
+{
+   ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD8));
+   brw_simd_mark_compiled(simd_state, SIMD8, not_spilled);
+
+   ASSERT_TRUE(brw_simd_any_compiled(simd_state));
+   ASSERT_EQ(brw_simd_first_compiled(simd_state), SIMD8);
+}
+
+TEST_F(SIMDSelectionCS, FirstCompiledIsSIMD16)
+{
+   ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD8));
+   ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD16));
+   brw_simd_mark_compiled(simd_state, SIMD16, not_spilled);
+
+   ASSERT_TRUE(brw_simd_any_compiled(simd_state));
+   ASSERT_EQ(brw_simd_first_compiled(simd_state), SIMD16);
+}
+
+TEST_F(SIMDSelectionCS, FirstCompiledIsSIMD32)
+{
+   ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD8));
+   ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD16));
+   ASSERT_TRUE(brw_simd_should_compile(simd_state, SIMD32));
+   brw_simd_mark_compiled(simd_state, SIMD32, not_spilled);
+
+   ASSERT_TRUE(brw_simd_any_compiled(simd_state));
+   ASSERT_EQ(brw_simd_first_compiled(simd_state), SIMD32);
+}
author	Caio Oliveira <caio.oliveira@intel.com>
	Tue, 8 Nov 2022 11:38:18 +0000 (03:38 -0800)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 15 Nov 2022 04:55:18 +0000 (04:55 +0000)
src/intel/compiler/brw_fs.cpp		patch \| blob \| history
src/intel/compiler/brw_mesh.cpp		patch \| blob \| history
src/intel/compiler/brw_private.h		patch \| blob \| history
src/intel/compiler/brw_simd_selection.cpp		patch \| blob \| history
src/intel/compiler/test_simd_selection.cpp		patch \| blob \| history