nir: rename ACCESS_STREAM_CACHE_POLICY -> ACCESS_NON_TEMPORAL and document
authorMarek Olšák <marek.olsak@amd.com>
Fri, 28 Apr 2023 03:02:28 +0000 (23:02 -0400)
committerMarge Bot <emma+marge@anholt.net>
Thu, 4 May 2023 01:55:22 +0000 (01:55 +0000)
Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22769>

12 files changed:
src/amd/common/ac_nir.c
src/amd/common/ac_nir_lower_esgs_io_to_mem.c
src/amd/common/ac_nir_lower_ngg.c
src/amd/compiler/aco_instruction_selection.cpp
src/amd/llvm/ac_nir_to_llvm.c
src/compiler/nir/nir_print.c
src/compiler/shader_enums.h
src/compiler/spirv/spirv_to_nir.c
src/compiler/spirv/vtn_variables.c
src/gallium/auxiliary/nir/tgsi_to_nir.c
src/gallium/drivers/radeonsi/si_shaderlib_nir.c
src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c

index b4afb89..a2201a0 100644 (file)
@@ -524,7 +524,7 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
       nir_ssa_def *zero = nir_imm_int(b, 0);
       nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero,
                            .base = output->offset, .write_mask = mask,
-                           .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY);
+                           .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
    }
 
    nir_pop_if(b, NULL);
@@ -581,7 +581,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
             outputs.data[i][j] =
                nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
                                    .base = offset,
-                                   .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY);
+                                   .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
 
             /* clamp legacy color output */
             if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
@@ -607,7 +607,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
             nir_ssa_def *data =
                nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
                                    .base = offset,
-                                   .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY);
+                                   .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
 
             if (has_lo_16bit)
                outputs.data_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
@@ -944,7 +944,7 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
          nir_ssa_def *data = nir_u2uN(b, output, 32);
 
          nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
-                              .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY |
+                              .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
                                         ACCESS_IS_SWIZZLED_AMD,
                               .base = base,
                               /* For ACO to not reorder this store around EmitVertex/EndPrimitve */
@@ -988,7 +988,7 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
 
          nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
                               gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
-                              .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY |
+                              .access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
                                         ACCESS_IS_SWIZZLED_AMD,
                               /* For ACO to not reorder this store around EmitVertex/EndPrimitve */
                               .memory_modes = nir_var_shader_out);
index bb490d5..53471f3 100644 (file)
@@ -113,7 +113,7 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s
          nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero,
                               .base = start_byte, .memory_modes = nir_var_shader_out,
                               .access = ACCESS_COHERENT |
-                                        (slc ? ACCESS_STREAM_CACHE_POLICY : 0) |
+                                        (slc ? ACCESS_NON_TEMPORAL : 0) |
                                         (swizzled ? ACCESS_IS_SWIZZLED_AMD : 0));
 
          start_byte += store_bytes;
index 9ae0983..0414f4d 100644 (file)
@@ -2017,7 +2017,7 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info,
                            vtx_buffer_offsets[out->buffer],
                            zero, zero,
                            .base = out->offset,
-                           .access = ACCESS_STREAM_CACHE_POLICY);
+                           .access = ACCESS_NON_TEMPORAL);
    }
 }
 
index c9b78c5..7bb3a24 100644 (file)
@@ -7092,7 +7092,7 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
    Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp();
 
    bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
-   bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY;
+   bool slc = nir_intrinsic_access(intrin) & ACCESS_NON_TEMPORAL;
 
    unsigned const_offset = nir_intrinsic_base(intrin);
    unsigned elem_size_bytes = intrin->dest.ssa.bit_size / 8u;
@@ -7164,7 +7164,7 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
    Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[4].ssa)) : Temp();
 
    bool glc = nir_intrinsic_access(intrin) & ACCESS_COHERENT;
-   bool slc = nir_intrinsic_access(intrin) & ACCESS_STREAM_CACHE_POLICY;
+   bool slc = nir_intrinsic_access(intrin) & ACCESS_NON_TEMPORAL;
 
    unsigned const_offset = nir_intrinsic_base(intrin);
    unsigned write_mask = nir_intrinsic_write_mask(intrin);
index 3a3c7d5..000033b 100644 (file)
@@ -1822,7 +1822,7 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qual
       cache_policy |= ac_glc;
    }
 
-   if (access & ACCESS_STREAM_CACHE_POLICY)
+   if (access & ACCESS_NON_TEMPORAL)
       cache_policy |= ac_slc | ac_glc;
 
    return cache_policy;
@@ -4005,7 +4005,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
       bool swizzled = nir_intrinsic_access(instr) & ACCESS_IS_SWIZZLED_AMD;
       bool reorder = nir_intrinsic_can_reorder(instr);
       bool coherent = nir_intrinsic_access(instr) & ACCESS_COHERENT;
-      bool slc = nir_intrinsic_access(instr) & ACCESS_STREAM_CACHE_POLICY;
+      bool slc = nir_intrinsic_access(instr) & ACCESS_NON_TEMPORAL;
       bool uses_format = nir_intrinsic_access(instr) & ACCESS_USES_FORMAT_AMD;
 
       enum ac_image_cache_policy cache_policy = 0;
index edab512..3c00171 100644 (file)
@@ -650,12 +650,12 @@ print_var_decl(nir_variable *var, print_state *state)
    const char *const ronly = (access & ACCESS_NON_WRITEABLE) ? "readonly " : "";
    const char *const wonly = (access & ACCESS_NON_READABLE) ? "writeonly " : "";
    const char *const reorder = (access & ACCESS_CAN_REORDER) ? "reorderable " : "";
-   const char *const stream_cache_policy = (access & ACCESS_STREAM_CACHE_POLICY) ?
-                                           "stream-cache-policy " : "";
+   const char *const non_temporal = (access & ACCESS_NON_TEMPORAL) ?
+                                       "non-temporal" : "";
    const char *const include_helpers = (access & ACCESS_INCLUDE_HELPERS) ?
                                        "include-helpers " : "";
    fprintf(fp, "%s%s%s%s%s%s%s%s", coher, volat, restr, ronly, wonly, reorder,
-           stream_cache_policy, include_helpers);
+           non_temporal, include_helpers);
 
    if (glsl_get_base_type(glsl_without_array(var->type)) == GLSL_TYPE_IMAGE) {
       fprintf(fp, "%s ", util_format_short_name(var->data.image.format));
index 2dfca1d..d86df6c 100644 (file)
@@ -1026,8 +1026,11 @@ enum gl_access_qualifier
     */
    ACCESS_CAN_REORDER = (1 << 6),
 
-   /** Use as little cache space as possible. */
-   ACCESS_STREAM_CACHE_POLICY = (1 << 7),
+   /**
+    * Hints that the accessed address is not likely to be accessed again
+    * in the near future. This reduces data retention in caches.
+    */
+   ACCESS_NON_TEMPORAL = (1 << 7),
 
    /** Execute instruction also in helpers. */
    ACCESS_INCLUDE_HELPERS = (1 << 8),
index 5997e86..3dbb77f 100644 (file)
@@ -3113,7 +3113,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
    vtn_foreach_decoration(b, sampled_val, non_uniform_decoration_cb, &access);
 
    if (operands & SpvImageOperandsNontemporalMask)
-      access |= ACCESS_STREAM_CACHE_POLICY;
+      access |= ACCESS_NON_TEMPORAL;
 
    if (sampler && b->options->force_tex_non_uniform)
       access |= ACCESS_NON_UNIFORM;
@@ -3370,7 +3370,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
       if (operands & SpvImageOperandsVolatileTexelMask)
          access |= ACCESS_VOLATILE;
       if (operands & SpvImageOperandsNontemporalMask)
-         access |= ACCESS_STREAM_CACHE_POLICY;
+         access |= ACCESS_NON_TEMPORAL;
 
       break;
    }
@@ -3412,7 +3412,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
       if (operands & SpvImageOperandsVolatileTexelMask)
          access |= ACCESS_VOLATILE;
       if (operands & SpvImageOperandsNontemporalMask)
-         access |= ACCESS_STREAM_CACHE_POLICY;
+         access |= ACCESS_NON_TEMPORAL;
 
       break;
    }
index 8db61c8..2a7ebcf 100644 (file)
@@ -2345,7 +2345,7 @@ spv_access_to_gl_access(SpvMemoryAccessMask access)
    if (access & SpvMemoryAccessVolatileMask)
       result |= ACCESS_VOLATILE;
    if (access & SpvMemoryAccessNontemporalMask)
-      result |= ACCESS_STREAM_CACHE_POLICY;
+      result |= ACCESS_NON_TEMPORAL;
 
    return result;
 }
index 579a62b..a8f5cff 100644 (file)
@@ -1673,7 +1673,7 @@ get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst)
    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
       access |= ACCESS_VOLATILE;
    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
-      access |= ACCESS_STREAM_CACHE_POLICY;
+      access |= ACCESS_NON_TEMPORAL;
 
    return access;
 }
index a4da273..f0ef2b0 100644 (file)
@@ -268,7 +268,7 @@ void *si_create_clear_buffer_rmw_cs(struct si_context *sctx)
    data = nir_ior(&b, data, nir_channel(&b, user_sgprs, 0));
 
    nir_store_ssbo(&b, data, zero, address,
-      .access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_STREAM_CACHE_POLICY : 0,
+      .access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_NON_TEMPORAL : 0,
       .align_mul = 4);
 
    return create_shader_state(sctx, b.shader);
index 32f0934..b162643 100644 (file)
@@ -202,7 +202,7 @@ emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id
           spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationNonUniform);
           break;
        case ACCESS_CAN_REORDER:
-       case ACCESS_STREAM_CACHE_POLICY:
+       case ACCESS_NON_TEMPORAL:
           /* no equivalent */
           break;
        default: