nir: add nir_intrinsic_image_samples_identical
authorMarek Olšák <marek.olsak@amd.com>
Tue, 19 Jul 2022 05:23:44 +0000 (01:23 -0400)
committerMarge Bot <emma+marge@anholt.net>
Wed, 3 Aug 2022 17:44:15 +0000 (17:44 +0000)
radeonsi will use it

Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17693>

src/amd/llvm/ac_nir_to_llvm.c
src/compiler/glsl/gl_nir_lower_images.c
src/compiler/glsl/gl_nir_lower_samplers_as_deref.c
src/compiler/nir/nir_divergence_analysis.c
src/compiler/nir/nir_group_loads.c
src/compiler/nir/nir_intrinsics.py
src/compiler/nir/nir_lower_non_uniform_access.c
src/compiler/nir/nir_opt_access.c
src/compiler/nir/nir_opt_preamble.c

index b7a1292..47ee87c 100644 (file)
@@ -3844,6 +3844,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
       break;
    case nir_intrinsic_image_deref_load:
    case nir_intrinsic_image_deref_sparse_load:
+   case nir_intrinsic_image_deref_samples_identical:
       result = visit_image_load(ctx, instr, false);
       break;
    case nir_intrinsic_bindless_image_store:
index fde9960..09b0094 100644 (file)
@@ -78,6 +78,7 @@ lower_impl(nir_builder *b, nir_instr *instr, bool bindless_only)
    case nir_intrinsic_image_deref_load:
    case nir_intrinsic_image_deref_samples:
    case nir_intrinsic_image_deref_size:
+   case nir_intrinsic_image_deref_samples_identical:
    case nir_intrinsic_image_deref_store: {
       deref = nir_src_as_deref(intrinsic->src[0]);
       var = nir_deref_instr_get_variable(deref);
index a6f72d7..d4a8087 100644 (file)
@@ -328,6 +328,7 @@ lower_intrinsic(nir_intrinsic_instr *instr,
        instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
        instr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
        instr->intrinsic == nir_intrinsic_image_deref_size ||
+       instr->intrinsic == nir_intrinsic_image_deref_samples_identical ||
        instr->intrinsic == nir_intrinsic_image_deref_samples) {
 
       b->cursor = nir_before_instr(&instr->instr);
index 250c217..072a83e 100644 (file)
@@ -323,6 +323,13 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
       is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
       break;
 
+   case nir_intrinsic_image_samples_identical:
+   case nir_intrinsic_image_deref_samples_identical:
+   case nir_intrinsic_bindless_image_samples_identical:
+      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
+                     instr->src[1].ssa->divergent;
+      break;
+
    case nir_intrinsic_image_load:
    case nir_intrinsic_image_deref_load:
    case nir_intrinsic_bindless_image_load:
index b9da5c3..b45f9f3 100644 (file)
@@ -83,6 +83,9 @@ get_intrinsic_resource(nir_intrinsic_instr *intr)
    case nir_intrinsic_image_sparse_load:
    case nir_intrinsic_image_deref_sparse_load:
    /* Group image_size too because it has the same latency as cache hits. */
+   case nir_intrinsic_image_samples_identical:
+   case nir_intrinsic_image_deref_samples_identical:
+   case nir_intrinsic_bindless_image_samples_identical:
    case nir_intrinsic_image_size:
    case nir_intrinsic_image_deref_size:
    case nir_intrinsic_bindless_image_load:
index 36b2136..6fb7dbd 100644 (file)
@@ -649,6 +649,8 @@ image("size",    dest_comp=0, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER])
 image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
 image("atomic_inc_wrap",  src_comp=[4, 1, 1], dest_comp=1)
 image("atomic_dec_wrap",  src_comp=[4, 1, 1], dest_comp=1)
+# This returns true if all samples within the pixel have equal color values.
+image("samples_identical", dest_comp=1, src_comp=[4], flags=[CAN_ELIMINATE])
 # CL-specific format queries
 image("format", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
 image("order", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
index fbdd372..02ad2a6 100644 (file)
@@ -272,6 +272,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
             case nir_intrinsic_image_atomic_fmax:
             case nir_intrinsic_image_size:
             case nir_intrinsic_image_samples:
+            case nir_intrinsic_image_samples_identical:
             case nir_intrinsic_bindless_image_load:
             case nir_intrinsic_bindless_image_sparse_load:
             case nir_intrinsic_bindless_image_store:
@@ -290,6 +291,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
             case nir_intrinsic_bindless_image_atomic_fmax:
             case nir_intrinsic_bindless_image_size:
             case nir_intrinsic_bindless_image_samples:
+            case nir_intrinsic_bindless_image_samples_identical:
             case nir_intrinsic_image_deref_load:
             case nir_intrinsic_image_deref_sparse_load:
             case nir_intrinsic_image_deref_store:
@@ -308,6 +310,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
             case nir_intrinsic_image_deref_atomic_fmax:
             case nir_intrinsic_image_deref_size:
             case nir_intrinsic_image_deref_samples:
+            case nir_intrinsic_image_deref_samples_identical:
                if ((options->types & nir_lower_non_uniform_image_access) &&
                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
                   progress = true;
index 14e49ee..3d4fce2 100644 (file)
@@ -97,6 +97,7 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
    case nir_intrinsic_image_deref_atomic_fadd:
    case nir_intrinsic_image_deref_atomic_fmin:
    case nir_intrinsic_image_deref_atomic_fmax:
+   case nir_intrinsic_image_deref_samples_identical:
       var = nir_intrinsic_get_var(instr, 0);
       read = instr->intrinsic != nir_intrinsic_image_deref_store;
       write = instr->intrinsic != nir_intrinsic_image_deref_load &&
@@ -139,6 +140,7 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
    case nir_intrinsic_bindless_image_atomic_fadd:
    case nir_intrinsic_bindless_image_atomic_fmin:
    case nir_intrinsic_bindless_image_atomic_fmax:
+   case nir_intrinsic_bindless_image_samples_identical:
       read = instr->intrinsic != nir_intrinsic_bindless_image_store;
       write = instr->intrinsic != nir_intrinsic_bindless_image_load &&
               instr->intrinsic != nir_intrinsic_bindless_image_sparse_load;
index 0a9224d..174021e 100644 (file)
@@ -197,6 +197,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx)
     * sources can be moved.
     */
    case nir_intrinsic_image_load:
+   case nir_intrinsic_image_samples_identical:
    case nir_intrinsic_bindless_image_load:
    case nir_intrinsic_load_ssbo:
    case nir_intrinsic_load_ssbo_ir3: