From: Bas Nieuwenhuizen Date: Tue, 20 Sep 2022 22:55:35 +0000 (+0200) Subject: radv: Hardcode root node id. X-Git-Tag: upstream/22.3.5~2452 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ffc5f52724893ef33a63e7f8f6c335f0587b7790;p=platform%2Fupstream%2Fmesa.git radv: Hardcode root node id. Optimizes code a tiny bit, and avoid the hack of encoding the root node id in the low bits of the BLAS address in an instance node. This is needed to adjust serialization/deserialization as the instance address there has to be the base address, so this avoids some wrapping/unwrapping. Reviewed-by: Konstantin Seurer Part-of: --- diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index dec87c1..f7d2121 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -53,8 +53,8 @@ struct radv_accel_struct_geometry_info { }; struct radv_accel_struct_header { - uint32_t root_node_offset; uint32_t reserved; + uint32_t reserved2; float aabb[2][3]; /* Everything after this gets updated/copied from the CPU. */ @@ -119,4 +119,7 @@ struct radv_bvh_box32_node { uint32_t reserved[4]; }; +/* 128 bytes of header & a box32 node */ +#define RADV_BVH_ROOT_NODE (0x10 + radv_bvh_node_internal) + #endif diff --git a/src/amd/vulkan/bvh/internal.comp b/src/amd/vulkan/bvh/internal.comp index 9b26530..de4de86 100644 --- a/src/amd/vulkan/bvh/internal.comp +++ b/src/amd/vulkan/bvh/internal.comp @@ -95,7 +95,6 @@ main(void) if (fill_header) { REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.bvh); - DEREF(header).root_node_offset = node_id; DEREF(header).aabb[0][0] = total_bounds.min.x; DEREF(header).aabb[0][1] = total_bounds.min.y; diff --git a/src/amd/vulkan/bvh/leaf.comp b/src/amd/vulkan/bvh/leaf.comp index 4e98691..a0e86f6 100644 --- a/src/amd/vulkan/bvh/leaf.comp +++ b/src/amd/vulkan/bvh/leaf.comp @@ -211,8 +211,7 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g radv_accel_struct_header instance_header = DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference)); - DEREF(node).base_ptr = - instance.accelerationStructureReference | instance_header.root_node_offset; + DEREF(node).base_ptr = instance.accelerationStructureReference; for (uint32_t comp = 0; comp < 3; ++comp) { bounds.min[comp] = transform[3][comp]; diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index d6f04ca..71b0cbc 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -843,9 +843,15 @@ radv_CmdBuildAccelerationStructuresKHR( (src_scratch_offset == buffer_1_offset) ? buffer_2_offset : buffer_1_offset; uint32_t dst_node_offset = bvh_states[i].node_offset; - if (final_iter) + if (final_iter) { dst_node_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64); + /* Make sure we build the BVH so the hardcoded root node is valid. */ + STATIC_ASSERT(RADV_BVH_ROOT_NODE == + DIV_ROUND_UP(sizeof(struct radv_accel_struct_header), 64) * 8 + + radv_bvh_node_internal); + } + const struct internal_args consts = { .bvh = accel_struct->va, .src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset, diff --git a/src/amd/vulkan/radv_nir_lower_ray_queries.c b/src/amd/vulkan/radv_nir_lower_ray_queries.c index b25f0d8..7c78726 100644 --- a/src/amd/vulkan/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/radv_nir_lower_ray_queries.c @@ -366,12 +366,9 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins { rq_store_var(b, index, vars->trav.bvh_base, build_addr_to_node(b, accel_struct), 1); - nir_ssa_def *bvh_root = - nir_build_load_global(b, 1, 32, accel_struct, .access = ACCESS_NON_WRITEABLE, - .align_mul = 64, .align_offset = 0); - rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 1), 0x1); - rq_store_array(b, index, vars->stack, nir_imm_int(b, 0), bvh_root, 0x1); + rq_store_array(b, index, vars->stack, nir_imm_int(b, 0), nir_imm_int(b, RADV_BVH_ROOT_NODE), + 0x1); rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1); @@ -749,7 +746,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars 1); rq_store_array(b, index, vars->stack, rq_load_var(b, index, vars->trav.stack), - nir_iand_imm(b, nir_channel(b, instance_data, 0), 63), 0x1); + nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1); rq_store_var(b, index, vars->trav.stack, nir_iadd_imm(b, rq_load_var(b, index, vars->trav.stack), 1), 1); diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 0f9a8db..374ce4a 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -1422,9 +1422,6 @@ build_traversal_shader(struct radv_device *device, { nir_store_var(&b, trav_vars.bvh_base, build_addr_to_node(&b, accel_struct), 1); - nir_ssa_def *bvh_root = nir_build_load_global( - &b, 1, 32, accel_struct, .access = ACCESS_NON_WRITEABLE, .align_mul = 64); - nir_ssa_def *desc = create_bvh_descriptor(&b); nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7); @@ -1438,7 +1435,7 @@ build_traversal_shader(struct radv_device *device, nir_store_var(&b, trav_vars.lds_stack_base, stack_base, 1); nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, 0), 1); - nir_store_var(&b, trav_vars.current_node, bvh_root, 0x1); + nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1); nir_push_loop(&b); @@ -1567,8 +1564,7 @@ build_traversal_shader(struct radv_device *device, build_addr_to_node( &b, nir_pack_64_2x32(&b, nir_channels(&b, instance_data, 0x3))), 1); - nir_store_var(&b, trav_vars.current_node, - nir_iand_imm(&b, nir_channel(&b, instance_data, 0), 63), 1); + nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 1); nir_store_var( &b, trav_vars.origin, diff --git a/src/amd/vulkan/radv_rra.c b/src/amd/vulkan/radv_rra.c index 8eb1988..3c5ec15 100644 --- a/src/amd/vulkan/radv_rra.c +++ b/src/amd/vulkan/radv_rra.c @@ -373,7 +373,7 @@ rra_dump_blas_header(struct radv_accel_struct_header *header, } static void -rra_dump_blas_geometry_infos(struct radv_accel_struct_geometry_info *geometry_infos, +rra_dump_blas_geometry_infos(struct radv_accel_struct_geometry_info *geometry_infos, uint32_t geometry_count, FILE *output) { uint32_t accumulated_primitive_count = 0; @@ -465,10 +465,9 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, leaf_nodes_size, internal_nodes_size, parent_table_size, is_bottom_level); } else if (type == radv_bvh_node_instance) { struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset); - uint64_t blas_va = src->base_ptr & (~63UL); - if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va)) { + if (!_mesa_hash_table_u64_search(accel_struct_vas, src->base_ptr)) { rra_accel_struct_validation_fail(offset, "Invalid instance node pointer 0x%llx", - (unsigned long long)blas_va); + (unsigned long long)src->base_ptr); result = false; } } @@ -503,14 +502,11 @@ rra_transcode_aabb_node(struct rra_aabb_node *dst, const struct radv_bvh_aabb_no static void rra_transcode_instance_node(struct rra_instance_node *dst, const struct radv_bvh_instance_node *src) { - /* Mask out root node offset from AS pointer to get the raw VA */ - uint64_t blas_va = src->base_ptr & (~63UL); - dst->custom_instance_id = src->custom_instance_and_mask & 0xffffff; dst->mask = src->custom_instance_and_mask >> 24; dst->sbt_offset = src->sbt_offset_and_flags & 0xffffff; dst->instance_flags = src->sbt_offset_and_flags >> 24; - dst->blas_va = (blas_va + sizeof(struct rra_accel_struct_metadata)) >> 3; + dst->blas_va = (src->base_ptr + sizeof(struct rra_accel_struct_metadata)) >> 3; dst->instance_id = src->instance_id; dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata); @@ -640,7 +636,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct, ((dst_leaf_nodes_size + dst_internal_nodes_size) / 64) * sizeof(uint32_t); /* convert root node id to offset */ - uint32_t src_root_offset = (header->root_node_offset & ~7) << 3; + uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3; if (should_validate) if (!rra_validate_node(accel_struct_vas, data, @@ -656,7 +652,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct, } node_parent_table[rra_parent_table_index_from_offset(RRA_ROOT_NODE_OFFSET, node_parent_table_size)] = 0xffffffff; - + uint32_t *leaf_node_ids = malloc(primitive_count * sizeof(uint32_t)); if (!leaf_node_ids) { free(node_parent_table); @@ -1005,7 +1001,7 @@ rra_copy_acceleration_structures(VkQueue vk_queue, struct rra_accel_struct_copy dst->copied_structures[i].handle = structure; dst->copied_structures[i].data = dst->map_data + dst_offset; - + dst_offset += accel_struct->size; ++(*copied_structure_count);