From 38842109020515d3fd9c06d4918956a63a8c65ad Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 12 Nov 2022 02:13:17 +0100 Subject: [PATCH] radv: Skip and for node_to_addr with bvh_base. Cause the bvh base is always 64 byte aligned. Totals from 7 (0.01% of 134913) affected shaders: CodeSize: 209216 -> 209076 (-0.07%) Instrs: 38402 -> 38374 (-0.07%) Latency: 804537 -> 803899 (-0.08%) InvThroughput: 165663 -> 165530 (-0.08%) Copies: 4919 -> 4912 (-0.14%) Part-of: --- src/amd/vulkan/radv_rt_common.c | 23 ++++++++++++++--------- src/amd/vulkan/radv_rt_common.h | 2 -- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 58d2ea3..c0fd6d1 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -25,6 +25,9 @@ #include "radv_rt_common.h" #include "radv_acceleration_structure.h" +static nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, + nir_ssa_def *node, bool skip_type_and); + bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines) { @@ -78,7 +81,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4); - nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); + nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false); /* vec4 distances = vec4(INF, INF, INF, INF); */ nir_variable *distances = @@ -179,7 +182,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s { const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); - nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node); + nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false); const uint32_t coord_offsets[3] = { offsetof(struct radv_bvh_triangle_node, coords[0]), @@ -345,10 +348,11 @@ build_addr_to_node(nir_builder *b, nir_ssa_def *addr) return nir_iand_imm(b, node, (bvh_size - 1) << 3); } -nir_ssa_def * -build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node) +static nir_ssa_def * +build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node, + bool skip_type_and) { - nir_ssa_def *addr = nir_iand_imm(b, node, ~7ull); + nir_ssa_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull); addr = nir_ishl_imm(b, addr, 3); /* Assumes everything is in the top half of address space, which is true in * GFX9+ for now. */ @@ -455,7 +459,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, nir_flt(b, args->tmin, intersection.t), not_cull)); { - intersection.base.node_addr = build_node_to_addr(device, b, bvh_node); + intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false); nir_ssa_def *triangle_info = nir_build_load_global( b, 2, 32, nir_iadd_imm(b, intersection.base.node_addr, @@ -492,7 +496,7 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, return; struct radv_leaf_intersection intersection; - intersection.node_addr = build_node_to_addr(device, b, bvh_node); + intersection.node_addr = build_node_to_addr(device, b, bvh_node, false); nir_ssa_def *triangle_info = nir_build_load_global(b, 2, 32, nir_iadd_imm(b, intersection.node_addr, 24)); intersection.primitive_id = nir_channel(b, triangle_info, 0); @@ -590,7 +594,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, { nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node); nir_ssa_def *bvh_addr = - build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base)); + build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev); nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, RADV_BVH_INVALID_NODE))); @@ -653,7 +657,8 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, nir_push_else(b, NULL); { /* instance */ - nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node); + nir_ssa_def *instance_node_addr = + build_node_to_addr(device, b, global_bvh_node, false); nir_ssa_def *instance_data = nir_build_load_global( b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); diff --git a/src/amd/vulkan/radv_rt_common.h b/src/amd/vulkan/radv_rt_common.h index f5d635f..cb38534 100644 --- a/src/amd/vulkan/radv_rt_common.h +++ b/src/amd/vulkan/radv_rt_common.h @@ -47,8 +47,6 @@ nir_ssa_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_buil nir_ssa_def *build_addr_to_node(nir_builder *b, nir_ssa_def *addr); -nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node); - nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation); -- 2.7.4