From: Caio Marcelo de Oliveira Filho Date: Mon, 5 Oct 2020 21:46:36 +0000 (-0700) Subject: nir: Add nir_intrinsic_{load,store}_deref_block_intel X-Git-Tag: upstream/21.0.0~2930 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dd39e311b336dddcc7343c5119ce05af410d8dc5;p=platform%2Fupstream%2Fmesa.git nir: Add nir_intrinsic_{load,store}_deref_block_intel Reviewed-by: Jason Ekstrand Part-of: --- diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index e35cf73..dc5bcaa 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -976,3 +976,27 @@ system_value("simd_width_intel", 1) # Load a relocatable 32-bit value intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32], indices=[PARAM_IDX], flags=[CAN_ELIMINATE, CAN_REORDER]) + +# OpSubgroupBlockReadINTEL and OpSubgroupBlockWriteINTEL from SPV_INTEL_subgroups. +intrinsic("load_deref_block_intel", dest_comp=0, src_comp=[-1], + indices=[ACCESS], flags=[CAN_ELIMINATE]) +intrinsic("store_deref_block_intel", src_comp=[-1, 0], indices=[WRMASK, ACCESS]) + +# src[] = { address }. +load("global_block_intel", [1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) + +# src[] = { buffer_index, offset }. +load("ssbo_block_intel", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) + +# src[] = { offset }. +load("shared_block_intel", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) + +# src[] = { value, address }. +store("global_block_intel", [1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) + +# src[] = { value, block_index, offset } +store("ssbo_block_intel", [-1, 1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) + +# src[] = { value, offset }. +store("shared_block_intel", [1], [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET]) + diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index b9352a4..54999c0 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1250,52 +1250,78 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, const nir_variable_mode mode = modes; nir_intrinsic_op op; - switch (mode) { - case nir_var_mem_ubo: - op = nir_intrinsic_load_ubo; - break; - case nir_var_mem_ssbo: - if (addr_format_is_global(addr_format, mode)) - op = nir_intrinsic_load_global; - else - op = nir_intrinsic_load_ssbo; - break; - case nir_var_mem_global: - assert(addr_format_is_global(addr_format, mode)); - op = nir_intrinsic_load_global; - break; - case nir_var_uniform: - assert(addr_format_is_offset(addr_format, mode)); - assert(b->shader->info.stage == MESA_SHADER_KERNEL); - op = nir_intrinsic_load_kernel_input; - break; - case nir_var_mem_shared: - assert(addr_format_is_offset(addr_format, mode)); - op = nir_intrinsic_load_shared; - break; - case nir_var_shader_temp: - case nir_var_function_temp: - if (addr_format_is_offset(addr_format, mode)) { - op = nir_intrinsic_load_scratch; - } else { + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: + switch (mode) { + case nir_var_mem_ubo: + op = nir_intrinsic_load_ubo; + break; + case nir_var_mem_ssbo: + if (addr_format_is_global(addr_format, mode)) + op = nir_intrinsic_load_global; + else + op = nir_intrinsic_load_ssbo; + break; + case nir_var_mem_global: assert(addr_format_is_global(addr_format, mode)); op = nir_intrinsic_load_global; + break; + case nir_var_uniform: + assert(addr_format_is_offset(addr_format, mode)); + assert(b->shader->info.stage == MESA_SHADER_KERNEL); + op = nir_intrinsic_load_kernel_input; + break; + case nir_var_mem_shared: + assert(addr_format_is_offset(addr_format, mode)); + op = nir_intrinsic_load_shared; + break; + case nir_var_shader_temp: + case nir_var_function_temp: + if (addr_format_is_offset(addr_format, mode)) { + op = nir_intrinsic_load_scratch; + } else { + assert(addr_format_is_global(addr_format, mode)); + op = nir_intrinsic_load_global; + } + break; + case nir_var_mem_push_const: + assert(addr_format == nir_address_format_32bit_offset); + op = nir_intrinsic_load_push_constant; + break; + case nir_var_mem_constant: + if (addr_format_is_offset(addr_format, mode)) { + op = nir_intrinsic_load_constant; + } else { + assert(addr_format_is_global(addr_format, mode)); + op = nir_intrinsic_load_global_constant; + } + break; + default: + unreachable("Unsupported explicit IO variable mode"); } break; - case nir_var_mem_push_const: - assert(addr_format == nir_address_format_32bit_offset); - op = nir_intrinsic_load_push_constant; - break; - case nir_var_mem_constant: - if (addr_format_is_offset(addr_format, mode)) { - op = nir_intrinsic_load_constant; - } else { - assert(addr_format_is_global(addr_format, mode)); - op = nir_intrinsic_load_global_constant; + + case nir_intrinsic_load_deref_block_intel: + switch (mode) { + case nir_var_mem_ssbo: + if (addr_format_is_global(addr_format, mode)) + op = nir_intrinsic_load_global_block_intel; + else + op = nir_intrinsic_load_ssbo_block_intel; + break; + case nir_var_mem_global: + op = nir_intrinsic_load_global_block_intel; + break; + case nir_var_mem_shared: + op = nir_intrinsic_load_shared_block_intel; + break; + default: + unreachable("Unsupported explicit IO variable mode"); } break; + default: - unreachable("Unsupported explicit IO variable mode"); + unreachable("Invalid intrinsic"); } nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); @@ -1356,6 +1382,7 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, */ nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size); + /* TODO: Better handle block_intel. */ const unsigned load_size = (bit_size / 8) * load->num_components; nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); @@ -1436,32 +1463,62 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, const nir_variable_mode mode = modes; nir_intrinsic_op op; - switch (mode) { - case nir_var_mem_ssbo: - if (addr_format_is_global(addr_format, mode)) - op = nir_intrinsic_store_global; - else - op = nir_intrinsic_store_ssbo; - break; - case nir_var_mem_global: - assert(addr_format_is_global(addr_format, mode)); - op = nir_intrinsic_store_global; - break; - case nir_var_mem_shared: - assert(addr_format_is_offset(addr_format, mode)); - op = nir_intrinsic_store_shared; - break; - case nir_var_shader_temp: - case nir_var_function_temp: - if (addr_format_is_offset(addr_format, mode)) { - op = nir_intrinsic_store_scratch; - } else { + switch (intrin->intrinsic) { + case nir_intrinsic_store_deref: + assert(write_mask != 0); + + switch (mode) { + case nir_var_mem_ssbo: + if (addr_format_is_global(addr_format, mode)) + op = nir_intrinsic_store_global; + else + op = nir_intrinsic_store_ssbo; + break; + case nir_var_mem_global: assert(addr_format_is_global(addr_format, mode)); op = nir_intrinsic_store_global; + break; + case nir_var_mem_shared: + assert(addr_format_is_offset(addr_format, mode)); + op = nir_intrinsic_store_shared; + break; + case nir_var_shader_temp: + case nir_var_function_temp: + if (addr_format_is_offset(addr_format, mode)) { + op = nir_intrinsic_store_scratch; + } else { + assert(addr_format_is_global(addr_format, mode)); + op = nir_intrinsic_store_global; + } + break; + default: + unreachable("Unsupported explicit IO variable mode"); + } + break; + + case nir_intrinsic_store_deref_block_intel: + assert(write_mask == 0); + + switch (mode) { + case nir_var_mem_ssbo: + if (addr_format_is_global(addr_format, mode)) + op = nir_intrinsic_store_global_block_intel; + else + op = nir_intrinsic_store_ssbo_block_intel; + break; + case nir_var_mem_global: + op = nir_intrinsic_store_global_block_intel; + break; + case nir_var_mem_shared: + op = nir_intrinsic_store_shared_block_intel; + break; + default: + unreachable("Unsupported explicit IO variable mode"); } break; + default: - unreachable("Unsupported explicit IO variable mode"); + unreachable("Invalid intrinsic"); } nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); @@ -1506,6 +1563,7 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, assert(value->bit_size % 8 == 0); if (addr_format_needs_bounds_check(addr_format)) { + /* TODO: Better handle block_intel. */ const unsigned store_size = (value->bit_size / 8) * store->num_components; nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); @@ -1749,6 +1807,25 @@ nir_lower_explicit_io_instr(nir_builder *b, break; } + case nir_intrinsic_load_deref_block_intel: { + nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format, + deref->modes, + align_mul, align_offset, + intrin->num_components); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value)); + break; + } + + case nir_intrinsic_store_deref_block_intel: { + assert(intrin->src[1].is_ssa); + nir_ssa_def *value = intrin->src[1].ssa; + const nir_component_mask_t write_mask = 0; + build_explicit_io_store(b, intrin, addr, addr_format, + deref->modes, align_mul, align_offset, + value, write_mask); + break; + } + default: { nir_ssa_def *value = build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes); @@ -1985,6 +2062,8 @@ nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, switch (intrin->intrinsic) { case nir_intrinsic_load_deref: case nir_intrinsic_store_deref: + case nir_intrinsic_load_deref_block_intel: + case nir_intrinsic_store_deref_block_intel: case nir_intrinsic_deref_atomic_add: case nir_intrinsic_deref_atomic_imin: case nir_intrinsic_deref_atomic_umin: diff --git a/src/compiler/nir/nir_opt_combine_stores.c b/src/compiler/nir/nir_opt_combine_stores.c index e97f81d..74a2a9c 100644 --- a/src/compiler/nir/nir_opt_combine_stores.c +++ b/src/compiler/nir/nir_opt_combine_stores.c @@ -356,6 +356,21 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block) break; } + case nir_intrinsic_load_deref_block_intel: + case nir_intrinsic_store_deref_block_intel: { + /* Combine all the stores that may alias with the whole variable (or + * cast). + */ + nir_deref_instr *operand = nir_src_as_deref(intrin->src[0]); + while (nir_deref_instr_parent(operand)) + operand = nir_deref_instr_parent(operand); + assert(operand->deref_type == nir_deref_type_var || + operand->deref_type == nir_deref_type_cast); + + combine_stores_with_deref(state, operand); + break; + } + case nir_intrinsic_copy_deref: case nir_intrinsic_memcpy_deref: { nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c b/src/compiler/nir/nir_opt_copy_prop_vars.c index 6fcd16f..2c6ad6b 100644 --- a/src/compiler/nir/nir_opt_copy_prop_vars.c +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c @@ -1076,6 +1076,24 @@ copy_prop_vars_block(struct copy_prop_var_state *state, kill_aliases(copies, dst, full_mask); break; + case nir_intrinsic_store_deref_block_intel: { + if (debug) dump_instr(instr); + + /* Invalidate the whole variable (or cast) and anything that alias + * with it. + */ + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + while (nir_deref_instr_parent(dst)) + dst = nir_deref_instr_parent(dst); + assert(dst->deref_type == nir_deref_type_var || + dst->deref_type == nir_deref_type_cast); + + unsigned num_components = glsl_get_vector_elements(dst->type); + unsigned full_mask = (1 << num_components) - 1; + kill_aliases(copies, dst, full_mask); + break; + } + default: continue; /* To skip the debug below. */ }