radv/ray_queries: Skip cull_mask handling if it is FF
authorKonstantin Seurer <konstantin.seurer@gmail.com>
Sat, 16 Sep 2023 17:35:21 +0000 (19:35 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 20 Sep 2023 13:00:03 +0000 (13:00 +0000)
Stats for Metro Exodus:

Totals from 26 (0.99% of 2627) affected shaders:
Instrs: 14586 -> 14232 (-2.43%)
CodeSize: 77024 -> 75192 (-2.38%)
VGPRs: 1408 -> 1208 (-14.20%)
Latency: 315076 -> 309898 (-1.64%)
InvThroughput: 42345 -> 41677 (-1.58%)
VClause: 366 -> 374 (+2.19%)
Copies: 2840 -> 2800 (-1.41%); split: -1.48%, +0.07%
Branches: 587 -> 561 (-4.43%)
PreSGPRs: 897 -> 853 (-4.91%)
PreVGPRs: 1290 -> 1122 (-13.02%)

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25268>

src/amd/vulkan/nir/radv_nir_lower_ray_queries.c
src/amd/vulkan/radv_rt_common.c
src/amd/vulkan/radv_rt_common.h

index 1e7a41b..a66277d 100644 (file)
@@ -172,6 +172,8 @@ struct ray_query_vars {
    rq_variable *stack;
    uint32_t shared_base;
    uint32_t stack_entries;
+
+   nir_intrinsic_instr *initialize;
 };
 
 #define VAR_NAME(name) strcat(strcpy(ralloc_size(ctx, strlen(base_name) + strlen(name) + 1), base_name), name)
@@ -387,6 +389,8 @@ lower_rq_initialize(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr,
    rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, -1), 1);
 
    rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1);
+
+   vars->initialize = instr;
 }
 
 static nir_def *
@@ -555,8 +559,18 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal
 }
 
 static nir_def *
-lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, struct radv_device *device)
+lower_rq_proceed(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars,
+                 struct radv_device *device)
 {
+   nir_metadata_require(nir_cf_node_get_function(&instr->instr.block->cf_node), nir_metadata_dominance);
+
+   bool ignore_cull_mask = false;
+   if (nir_block_dominates(vars->initialize->instr.block, instr->instr.block)) {
+      nir_src cull_mask = vars->initialize->src[3];
+      if (nir_src_is_const(cull_mask) && nir_src_as_uint(cull_mask) == 0xFF)
+         ignore_cull_mask = true;
+   }
+
    nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir");
    nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7);
 
@@ -591,6 +605,7 @@ lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, st
       .dir = rq_load_var(b, index, vars->direction),
       .vars = trav_vars,
       .stack_entries = vars->stack_entries,
+      .ignore_cull_mask = ignore_cull_mask,
       .stack_store_cb = store_stack_entry,
       .stack_load_cb = load_stack_entry,
       .aabb_cb = handle_candidate_aabb,
@@ -695,7 +710,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
                new_dest = lower_rq_load(&builder, index, intrinsic, vars);
                break;
             case nir_intrinsic_rq_proceed:
-               new_dest = lower_rq_proceed(&builder, index, vars, device);
+               new_dest = lower_rq_proceed(&builder, index, intrinsic, vars, device);
                break;
             case nir_intrinsic_rq_terminate:
                lower_rq_terminate(&builder, index, intrinsic, vars);
index 781df7a..5aab4dc 100644 (file)
@@ -574,12 +574,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
 
                nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
 
-               nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
-               nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24)));
-               {
-                  nir_jump(b, nir_jump_continue);
+               if (!args->ignore_cull_mask) {
+                  nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
+                  nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24)));
+                  {
+                     nir_jump(b, nir_jump_continue);
+                  }
+                  nir_pop_if(b, NULL);
                }
-               nir_pop_if(b, NULL);
 
                nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
                nir_store_deref(b, args->vars.bvh_base, nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)), 1);
index 949a301..9b098c2 100644 (file)
@@ -144,6 +144,8 @@ struct radv_ray_traversal_args {
    uint32_t stack_entries;
    uint32_t stack_base;
 
+   bool ignore_cull_mask;
+
    radv_rt_stack_store_cb stack_store_cb;
    radv_rt_stack_load_cb stack_load_cb;