From ea6993f9c76ec8b2cdfbd75914a614a55b2936c4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 19 Jul 2022 01:23:44 -0400 Subject: [PATCH] nir: add nir_intrinsic_image_samples_identical radeonsi will use it Reviewed-by: Jason Ekstrand Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 1 + src/compiler/glsl/gl_nir_lower_images.c | 1 + src/compiler/glsl/gl_nir_lower_samplers_as_deref.c | 1 + src/compiler/nir/nir_divergence_analysis.c | 7 +++++++ src/compiler/nir/nir_group_loads.c | 3 +++ src/compiler/nir/nir_intrinsics.py | 2 ++ src/compiler/nir/nir_lower_non_uniform_access.c | 3 +++ src/compiler/nir/nir_opt_access.c | 2 ++ src/compiler/nir/nir_opt_preamble.c | 1 + 9 files changed, 21 insertions(+) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index b7a1292..47ee87c 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3844,6 +3844,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins break; case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_sparse_load: + case nir_intrinsic_image_deref_samples_identical: result = visit_image_load(ctx, instr, false); break; case nir_intrinsic_bindless_image_store: diff --git a/src/compiler/glsl/gl_nir_lower_images.c b/src/compiler/glsl/gl_nir_lower_images.c index fde9960..09b0094 100644 --- a/src/compiler/glsl/gl_nir_lower_images.c +++ b/src/compiler/glsl/gl_nir_lower_images.c @@ -78,6 +78,7 @@ lower_impl(nir_builder *b, nir_instr *instr, bool bindless_only) case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_samples: case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples_identical: case nir_intrinsic_image_deref_store: { deref = nir_src_as_deref(intrinsic->src[0]); var = nir_deref_instr_get_variable(deref); diff --git a/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c b/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c index a6f72d7..d4a8087 100644 --- a/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c +++ b/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c @@ -328,6 +328,7 @@ lower_intrinsic(nir_intrinsic_instr *instr, instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap || instr->intrinsic == nir_intrinsic_image_deref_atomic_fadd || instr->intrinsic == nir_intrinsic_image_deref_size || + instr->intrinsic == nir_intrinsic_image_deref_samples_identical || instr->intrinsic == nir_intrinsic_image_deref_samples) { b->cursor = nir_before_instr(&instr->instr); diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 250c217..072a83e 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -323,6 +323,13 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); break; + case nir_intrinsic_image_samples_identical: + case nir_intrinsic_image_deref_samples_identical: + case nir_intrinsic_bindless_image_samples_identical: + is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || + instr->src[1].ssa->divergent; + break; + case nir_intrinsic_image_load: case nir_intrinsic_image_deref_load: case nir_intrinsic_bindless_image_load: diff --git a/src/compiler/nir/nir_group_loads.c b/src/compiler/nir/nir_group_loads.c index b9da5c3..b45f9f3 100644 --- a/src/compiler/nir/nir_group_loads.c +++ b/src/compiler/nir/nir_group_loads.c @@ -83,6 +83,9 @@ get_intrinsic_resource(nir_intrinsic_instr *intr) case nir_intrinsic_image_sparse_load: case nir_intrinsic_image_deref_sparse_load: /* Group image_size too because it has the same latency as cache hits. */ + case nir_intrinsic_image_samples_identical: + case nir_intrinsic_image_deref_samples_identical: + case nir_intrinsic_bindless_image_samples_identical: case nir_intrinsic_image_size: case nir_intrinsic_image_deref_size: case nir_intrinsic_bindless_image_load: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 36b2136..6fb7dbd 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -649,6 +649,8 @@ image("size", dest_comp=0, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER]) image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) image("atomic_inc_wrap", src_comp=[4, 1, 1], dest_comp=1) image("atomic_dec_wrap", src_comp=[4, 1, 1], dest_comp=1) +# This returns true if all samples within the pixel have equal color values. +image("samples_identical", dest_comp=1, src_comp=[4], flags=[CAN_ELIMINATE]) # CL-specific format queries image("format", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) image("order", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) diff --git a/src/compiler/nir/nir_lower_non_uniform_access.c b/src/compiler/nir/nir_lower_non_uniform_access.c index fbdd372..02ad2a6 100644 --- a/src/compiler/nir/nir_lower_non_uniform_access.c +++ b/src/compiler/nir/nir_lower_non_uniform_access.c @@ -272,6 +272,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, case nir_intrinsic_image_atomic_fmax: case nir_intrinsic_image_size: case nir_intrinsic_image_samples: + case nir_intrinsic_image_samples_identical: case nir_intrinsic_bindless_image_load: case nir_intrinsic_bindless_image_sparse_load: case nir_intrinsic_bindless_image_store: @@ -290,6 +291,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, case nir_intrinsic_bindless_image_atomic_fmax: case nir_intrinsic_bindless_image_size: case nir_intrinsic_bindless_image_samples: + case nir_intrinsic_bindless_image_samples_identical: case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_image_deref_store: @@ -308,6 +310,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, case nir_intrinsic_image_deref_atomic_fmax: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_samples_identical: if ((options->types & nir_lower_non_uniform_image_access) && lower_non_uniform_access_intrin(options, &b, intrin, 0)) progress = true; diff --git a/src/compiler/nir/nir_opt_access.c b/src/compiler/nir/nir_opt_access.c index 14e49ee..3d4fce2 100644 --- a/src/compiler/nir/nir_opt_access.c +++ b/src/compiler/nir/nir_opt_access.c @@ -97,6 +97,7 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) case nir_intrinsic_image_deref_atomic_fadd: case nir_intrinsic_image_deref_atomic_fmin: case nir_intrinsic_image_deref_atomic_fmax: + case nir_intrinsic_image_deref_samples_identical: var = nir_intrinsic_get_var(instr, 0); read = instr->intrinsic != nir_intrinsic_image_deref_store; write = instr->intrinsic != nir_intrinsic_image_deref_load && @@ -139,6 +140,7 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) case nir_intrinsic_bindless_image_atomic_fadd: case nir_intrinsic_bindless_image_atomic_fmin: case nir_intrinsic_bindless_image_atomic_fmax: + case nir_intrinsic_bindless_image_samples_identical: read = instr->intrinsic != nir_intrinsic_bindless_image_store; write = instr->intrinsic != nir_intrinsic_bindless_image_load && instr->intrinsic != nir_intrinsic_bindless_image_sparse_load; diff --git a/src/compiler/nir/nir_opt_preamble.c b/src/compiler/nir/nir_opt_preamble.c index 0a9224d..174021e 100644 --- a/src/compiler/nir/nir_opt_preamble.c +++ b/src/compiler/nir/nir_opt_preamble.c @@ -197,6 +197,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx) * sources can be moved. */ case nir_intrinsic_image_load: + case nir_intrinsic_image_samples_identical: case nir_intrinsic_bindless_image_load: case nir_intrinsic_load_ssbo: case nir_intrinsic_load_ssbo_ir3: -- 2.7.4