nir: Add nir_intrinsic_{load,store}_deref_block_intel
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Mon, 5 Oct 2020 21:46:36 +0000 (14:46 -0700)
committerMarge Bot <eric+marge@anholt.net>
Wed, 4 Nov 2020 20:24:48 +0000 (20:24 +0000)
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7448>

src/compiler/nir/nir_intrinsics.py
src/compiler/nir/nir_lower_io.c
src/compiler/nir/nir_opt_combine_stores.c
src/compiler/nir/nir_opt_copy_prop_vars.c

index e35cf73..dc5bcaa 100644 (file)
@@ -976,3 +976,27 @@ system_value("simd_width_intel", 1)
 # Load a relocatable 32-bit value
 intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
           indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER])
+
+# OpSubgroupBlockReadINTEL and OpSubgroupBlockWriteINTEL from SPV_INTEL_subgroups.
+intrinsic("load_deref_block_intel", dest_comp=0, src_comp=[-1],
+          indices=[ACCESS], flags=[CAN_ELIMINATE])
+intrinsic("store_deref_block_intel", src_comp=[-1, 0], indices=[WRMASK, ACCESS])
+
+# src[] = { address }.
+load("global_block_intel", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
+
+# src[] = { buffer_index, offset }.
+load("ssbo_block_intel", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
+
+# src[] = { offset }.
+load("shared_block_intel", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
+
+# src[] = { value, address }.
+store("global_block_intel", [1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
+
+# src[] = { value, block_index, offset }
+store("ssbo_block_intel", [-1, 1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
+
+# src[] = { value, offset }.
+store("shared_block_intel", [1], [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
+
index b9352a4..54999c0 100644 (file)
@@ -1250,52 +1250,78 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
    const nir_variable_mode mode = modes;
 
    nir_intrinsic_op op;
-   switch (mode) {
-   case nir_var_mem_ubo:
-      op = nir_intrinsic_load_ubo;
-      break;
-   case nir_var_mem_ssbo:
-      if (addr_format_is_global(addr_format, mode))
-         op = nir_intrinsic_load_global;
-      else
-         op = nir_intrinsic_load_ssbo;
-      break;
-   case nir_var_mem_global:
-      assert(addr_format_is_global(addr_format, mode));
-      op = nir_intrinsic_load_global;
-      break;
-   case nir_var_uniform:
-      assert(addr_format_is_offset(addr_format, mode));
-      assert(b->shader->info.stage == MESA_SHADER_KERNEL);
-      op = nir_intrinsic_load_kernel_input;
-      break;
-   case nir_var_mem_shared:
-      assert(addr_format_is_offset(addr_format, mode));
-      op = nir_intrinsic_load_shared;
-      break;
-   case nir_var_shader_temp:
-   case nir_var_function_temp:
-      if (addr_format_is_offset(addr_format, mode)) {
-         op = nir_intrinsic_load_scratch;
-      } else {
+   switch (intrin->intrinsic) {
+   case nir_intrinsic_load_deref:
+      switch (mode) {
+      case nir_var_mem_ubo:
+         op = nir_intrinsic_load_ubo;
+         break;
+      case nir_var_mem_ssbo:
+         if (addr_format_is_global(addr_format, mode))
+            op = nir_intrinsic_load_global;
+         else
+            op = nir_intrinsic_load_ssbo;
+         break;
+      case nir_var_mem_global:
          assert(addr_format_is_global(addr_format, mode));
          op = nir_intrinsic_load_global;
+         break;
+      case nir_var_uniform:
+         assert(addr_format_is_offset(addr_format, mode));
+         assert(b->shader->info.stage == MESA_SHADER_KERNEL);
+         op = nir_intrinsic_load_kernel_input;
+         break;
+      case nir_var_mem_shared:
+         assert(addr_format_is_offset(addr_format, mode));
+         op = nir_intrinsic_load_shared;
+         break;
+      case nir_var_shader_temp:
+      case nir_var_function_temp:
+         if (addr_format_is_offset(addr_format, mode)) {
+            op = nir_intrinsic_load_scratch;
+         } else {
+            assert(addr_format_is_global(addr_format, mode));
+            op = nir_intrinsic_load_global;
+         }
+         break;
+      case nir_var_mem_push_const:
+         assert(addr_format == nir_address_format_32bit_offset);
+         op = nir_intrinsic_load_push_constant;
+         break;
+      case nir_var_mem_constant:
+         if (addr_format_is_offset(addr_format, mode)) {
+            op = nir_intrinsic_load_constant;
+         } else {
+            assert(addr_format_is_global(addr_format, mode));
+            op = nir_intrinsic_load_global_constant;
+         }
+         break;
+      default:
+         unreachable("Unsupported explicit IO variable mode");
       }
       break;
-   case nir_var_mem_push_const:
-      assert(addr_format == nir_address_format_32bit_offset);
-      op = nir_intrinsic_load_push_constant;
-      break;
-   case nir_var_mem_constant:
-      if (addr_format_is_offset(addr_format, mode)) {
-         op = nir_intrinsic_load_constant;
-      } else {
-         assert(addr_format_is_global(addr_format, mode));
-         op = nir_intrinsic_load_global_constant;
+
+   case nir_intrinsic_load_deref_block_intel:
+      switch (mode) {
+      case nir_var_mem_ssbo:
+         if (addr_format_is_global(addr_format, mode))
+            op = nir_intrinsic_load_global_block_intel;
+         else
+            op = nir_intrinsic_load_ssbo_block_intel;
+         break;
+      case nir_var_mem_global:
+         op = nir_intrinsic_load_global_block_intel;
+         break;
+      case nir_var_mem_shared:
+         op = nir_intrinsic_load_shared_block_intel;
+         break;
+      default:
+         unreachable("Unsupported explicit IO variable mode");
       }
       break;
+
    default:
-      unreachable("Unsupported explicit IO variable mode");
+      unreachable("Invalid intrinsic");
    }
 
    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
@@ -1356,6 +1382,7 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
        */
       nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
 
+      /* TODO: Better handle block_intel. */
       const unsigned load_size = (bit_size / 8) * load->num_components;
       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
 
@@ -1436,32 +1463,62 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
    const nir_variable_mode mode = modes;
 
    nir_intrinsic_op op;
-   switch (mode) {
-   case nir_var_mem_ssbo:
-      if (addr_format_is_global(addr_format, mode))
-         op = nir_intrinsic_store_global;
-      else
-         op = nir_intrinsic_store_ssbo;
-      break;
-   case nir_var_mem_global:
-      assert(addr_format_is_global(addr_format, mode));
-      op = nir_intrinsic_store_global;
-      break;
-   case nir_var_mem_shared:
-      assert(addr_format_is_offset(addr_format, mode));
-      op = nir_intrinsic_store_shared;
-      break;
-   case nir_var_shader_temp:
-   case nir_var_function_temp:
-      if (addr_format_is_offset(addr_format, mode)) {
-         op = nir_intrinsic_store_scratch;
-      } else {
+   switch (intrin->intrinsic) {
+   case nir_intrinsic_store_deref:
+      assert(write_mask != 0);
+
+      switch (mode) {
+      case nir_var_mem_ssbo:
+         if (addr_format_is_global(addr_format, mode))
+            op = nir_intrinsic_store_global;
+         else
+            op = nir_intrinsic_store_ssbo;
+         break;
+      case nir_var_mem_global:
          assert(addr_format_is_global(addr_format, mode));
          op = nir_intrinsic_store_global;
+         break;
+      case nir_var_mem_shared:
+         assert(addr_format_is_offset(addr_format, mode));
+         op = nir_intrinsic_store_shared;
+         break;
+      case nir_var_shader_temp:
+      case nir_var_function_temp:
+         if (addr_format_is_offset(addr_format, mode)) {
+            op = nir_intrinsic_store_scratch;
+         } else {
+            assert(addr_format_is_global(addr_format, mode));
+            op = nir_intrinsic_store_global;
+         }
+         break;
+      default:
+         unreachable("Unsupported explicit IO variable mode");
+      }
+      break;
+
+   case nir_intrinsic_store_deref_block_intel:
+      assert(write_mask == 0);
+
+      switch (mode) {
+      case nir_var_mem_ssbo:
+         if (addr_format_is_global(addr_format, mode))
+            op = nir_intrinsic_store_global_block_intel;
+         else
+            op = nir_intrinsic_store_ssbo_block_intel;
+         break;
+      case nir_var_mem_global:
+         op = nir_intrinsic_store_global_block_intel;
+         break;
+      case nir_var_mem_shared:
+         op = nir_intrinsic_store_shared_block_intel;
+         break;
+      default:
+         unreachable("Unsupported explicit IO variable mode");
       }
       break;
+
    default:
-      unreachable("Unsupported explicit IO variable mode");
+      unreachable("Invalid intrinsic");
    }
 
    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
@@ -1506,6 +1563,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
    assert(value->bit_size % 8 == 0);
 
    if (addr_format_needs_bounds_check(addr_format)) {
+      /* TODO: Better handle block_intel. */
       const unsigned store_size = (value->bit_size / 8) * store->num_components;
       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
 
@@ -1749,6 +1807,25 @@ nir_lower_explicit_io_instr(nir_builder *b,
       break;
    }
 
+   case nir_intrinsic_load_deref_block_intel: {
+      nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
+                                                  deref->modes,
+                                                  align_mul, align_offset,
+                                                  intrin->num_components);
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
+      break;
+   }
+
+   case nir_intrinsic_store_deref_block_intel: {
+      assert(intrin->src[1].is_ssa);
+      nir_ssa_def *value = intrin->src[1].ssa;
+      const nir_component_mask_t write_mask = 0;
+      build_explicit_io_store(b, intrin, addr, addr_format,
+                              deref->modes, align_mul, align_offset,
+                              value, write_mask);
+      break;
+   }
+
    default: {
       nir_ssa_def *value =
          build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
@@ -1985,6 +2062,8 @@ nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
             switch (intrin->intrinsic) {
             case nir_intrinsic_load_deref:
             case nir_intrinsic_store_deref:
+            case nir_intrinsic_load_deref_block_intel:
+            case nir_intrinsic_store_deref_block_intel:
             case nir_intrinsic_deref_atomic_add:
             case nir_intrinsic_deref_atomic_imin:
             case nir_intrinsic_deref_atomic_umin:
index e97f81d..74a2a9c 100644 (file)
@@ -356,6 +356,21 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block)
          break;
       }
 
+      case nir_intrinsic_load_deref_block_intel:
+      case nir_intrinsic_store_deref_block_intel: {
+         /* Combine all the stores that may alias with the whole variable (or
+          * cast).
+          */
+         nir_deref_instr *operand = nir_src_as_deref(intrin->src[0]);
+         while (nir_deref_instr_parent(operand))
+            operand = nir_deref_instr_parent(operand);
+         assert(operand->deref_type == nir_deref_type_var ||
+                operand->deref_type == nir_deref_type_cast);
+
+         combine_stores_with_deref(state, operand);
+         break;
+      }
+
       case nir_intrinsic_copy_deref:
       case nir_intrinsic_memcpy_deref: {
          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
index 6fcd16f..2c6ad6b 100644 (file)
@@ -1076,6 +1076,24 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
          kill_aliases(copies, dst, full_mask);
          break;
 
+      case nir_intrinsic_store_deref_block_intel: {
+         if (debug) dump_instr(instr);
+
+         /* Invalidate the whole variable (or cast) and anything that alias
+          * with it.
+          */
+         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+         while (nir_deref_instr_parent(dst))
+            dst = nir_deref_instr_parent(dst);
+         assert(dst->deref_type == nir_deref_type_var ||
+                dst->deref_type == nir_deref_type_cast);
+
+         unsigned num_components = glsl_get_vector_elements(dst->type);
+         unsigned full_mask = (1 << num_components) - 1;
+         kill_aliases(copies, dst, full_mask);
+         break;
+      }
+
       default:
          continue; /* To skip the debug below. */
       }