From 12a4581a06047da72c62f8898806c3fd5fc0aa36 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 23 Jul 2022 23:23:55 +0200 Subject: [PATCH] radv/nir_lower_abi: Use instructions_pass MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit nir_shader_instructions_pass is sufficient and it should be a bit faster than nir_shader_lower_instructions. Signed-off-by: Konstantin Seurer Reviewed-by: Timur Kristóf Part-of: --- src/amd/vulkan/radv_nir_lower_abi.c | 214 +++++++++++++++++------------------- 1 file changed, 100 insertions(+), 114 deletions(-) diff --git a/src/amd/vulkan/radv_nir_lower_abi.c b/src/amd/vulkan/radv_nir_lower_abi.c index f23c70b..a7a2e3a 100644 --- a/src/amd/vulkan/radv_nir_lower_abi.c +++ b/src/amd/vulkan/radv_nir_lower_abi.c @@ -57,38 +57,56 @@ nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s) return nir_test_mask(b, settings, mask); } -static nir_ssa_def * +static bool lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) { - lower_abi_state *s = (lower_abi_state *) state; + if (instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + lower_abi_state *s = (lower_abi_state *)state; gl_shader_stage stage = b->shader->info.stage; + b->cursor = nir_before_instr(instr); + + nir_ssa_def *replacement = NULL; + switch (intrin->intrinsic) { case nir_intrinsic_load_ring_tess_factors_amd: - return load_ring(b, RING_HS_TESS_FACTOR, s); + if (s->use_llvm) + break; + replacement = load_ring(b, RING_HS_TESS_FACTOR, s); + break; case nir_intrinsic_load_ring_tess_factors_offset_amd: - return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset); + break; case nir_intrinsic_load_ring_tess_offchip_amd: - return load_ring(b, RING_HS_TESS_OFFCHIP, s); + if (s->use_llvm) + break; + replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s); + break; case nir_intrinsic_load_ring_tess_offchip_offset_amd: - return ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset); + break; case nir_intrinsic_load_tcs_num_patches_amd: - return nir_imm_int(b, s->info->num_tess_patches); - + replacement = nir_imm_int(b, s->info->num_tess_patches); + break; case nir_intrinsic_load_ring_esgs_amd: - return load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s); + if (s->use_llvm) + break; + replacement = load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s); + break; case nir_intrinsic_load_ring_es2gs_offset_amd: - return ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset); + break; case nir_intrinsic_load_tess_rel_patch_id_amd: if (stage == MESA_SHADER_TESS_CTRL) { - return nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0)); + replacement = nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), + nir_imm_int(b, 0)); } else if (stage == MESA_SHADER_TESS_EVAL) { /* Setting an upper bound like this will actually make it possible * to optimize some multiplications (in address calculations) so that @@ -97,52 +115,54 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id); nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr); nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(b->shader->info.tess.tcs_vertices_out, 1)); - return arg; + replacement = arg; } else { unreachable("invalid tessellation shader stage"); } - + break; case nir_intrinsic_load_patch_vertices_in: if (stage == MESA_SHADER_TESS_CTRL) - return nir_imm_int(b, s->pl_key->tcs.tess_input_vertices); + replacement = nir_imm_int(b, s->pl_key->tcs.tess_input_vertices); else if (stage == MESA_SHADER_TESS_EVAL) - return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out); + replacement = nir_imm_int(b, b->shader->info.tess.tcs_vertices_out); else unreachable("invalid tessellation shader stage"); - + break; case nir_intrinsic_load_gs_vertex_offset_amd: - return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]); - + replacement = + ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]); + break; case nir_intrinsic_load_workgroup_num_input_vertices_amd: - return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), - nir_imm_int(b, 12), nir_imm_int(b, 9)); - + replacement = nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), + nir_imm_int(b, 12), nir_imm_int(b, 9)); + break; case nir_intrinsic_load_workgroup_num_input_primitives_amd: - return nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), - nir_imm_int(b, 22), nir_imm_int(b, 9)); - + replacement = nir_ubfe(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), + nir_imm_int(b, 22), nir_imm_int(b, 9)); + break; case nir_intrinsic_load_packed_passthrough_primitive_amd: /* NGG passthrough mode: the HW already packs the primitive export value to a single register. */ - return ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]); + break; case nir_intrinsic_load_shader_query_enabled_amd: - return nir_ieq_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state), 1); - + replacement = nir_ieq_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state), 1); + break; case nir_intrinsic_load_cull_any_enabled_amd: - return nggc_bool_setting(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives, s); - + replacement = nggc_bool_setting( + b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives, s); + break; case nir_intrinsic_load_cull_front_face_enabled_amd: - return nggc_bool_setting(b, radv_nggc_front_face, s); - + replacement = nggc_bool_setting(b, radv_nggc_front_face, s); + break; case nir_intrinsic_load_cull_back_face_enabled_amd: - return nggc_bool_setting(b, radv_nggc_back_face, s); - + replacement = nggc_bool_setting(b, radv_nggc_back_face, s); + break; case nir_intrinsic_load_cull_ccw_amd: - return nggc_bool_setting(b, radv_nggc_face_is_ccw, s); - + replacement = nggc_bool_setting(b, radv_nggc_face_is_ccw, s); + break; case nir_intrinsic_load_cull_small_primitives_enabled_amd: - return nggc_bool_setting(b, radv_nggc_small_primitives, s); - + replacement = nggc_bool_setting(b, radv_nggc_small_primitives, s); + break; case nir_intrinsic_load_cull_small_prim_precision_amd: { /* To save space, only the exponent is stored in the high 8 bits. * We calculate the precision from those 8 bits: @@ -151,108 +171,73 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) */ nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings); nir_ssa_def *exponent = nir_ishr_imm(b, settings, 24u); - return nir_ldexp(b, nir_imm_float(b, 1.0f), exponent); + replacement = nir_ldexp(b, nir_imm_float(b, 1.0f), exponent); + break; } - case nir_intrinsic_load_viewport_x_scale: - return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]); + break; case nir_intrinsic_load_viewport_x_offset: - return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]); + break; case nir_intrinsic_load_viewport_y_scale: - return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]); + break; case nir_intrinsic_load_viewport_y_offset: - return ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]); + break; case nir_intrinsic_load_ring_task_draw_amd: - return load_ring(b, RING_TS_DRAW, s); - + replacement = load_ring(b, RING_TS_DRAW, s); + break; case nir_intrinsic_load_ring_task_payload_amd: - return load_ring(b, RING_TS_PAYLOAD, s); - + replacement = load_ring(b, RING_TS_PAYLOAD, s); + break; case nir_intrinsic_load_ring_mesh_scratch_amd: - return load_ring(b, RING_MS_SCRATCH, s); - + replacement = load_ring(b, RING_MS_SCRATCH, s); + break; case nir_intrinsic_load_ring_mesh_scratch_offset_amd: /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */ - return nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff), - RADV_MESH_SCRATCH_ENTRY_BYTES); - + replacement = nir_imul_imm( + b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff), + RADV_MESH_SCRATCH_ENTRY_BYTES); + break; case nir_intrinsic_load_task_ring_entry_amd: - return ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry); + break; case nir_intrinsic_load_task_ib_addr: - return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr); + break; case nir_intrinsic_load_task_ib_stride: - return ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride); - + replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride); + break; case nir_intrinsic_load_lshs_vertex_stride_amd: { unsigned io_num = stage == MESA_SHADER_VERTEX ? s->info->vs.num_linked_outputs : s->info->tcs.num_linked_inputs; - return nir_imm_int(b, io_num * 16); + replacement = nir_imm_int(b, io_num * 16); + break; } - case nir_intrinsic_load_hs_out_patch_data_offset_amd: { unsigned num_patches = s->info->num_tess_patches; unsigned out_vertices_per_patch = b->shader->info.tess.tcs_vertices_out; unsigned num_tcs_outputs = stage == MESA_SHADER_TESS_CTRL ? s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs; int per_vertex_output_patch_size = out_vertices_per_patch * num_tcs_outputs * 16u; - return nir_imm_int(b, num_patches * per_vertex_output_patch_size); + replacement = nir_imm_int(b, num_patches * per_vertex_output_patch_size); + break; } - default: - unreachable("invalid NIR RADV ABI intrinsic."); + break; } -} -static bool -filter_abi_instr(const nir_instr *instr, - UNUSED const void *state) -{ - lower_abi_state *s = (lower_abi_state *) state; - - if (instr->type != nir_instr_type_intrinsic) + if (!replacement) return false; - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - return (intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_amd && !s->use_llvm) || - (intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_amd && !s->use_llvm) || - (intrin->intrinsic == nir_intrinsic_load_ring_esgs_amd && !s->use_llvm) || - intrin->intrinsic == nir_intrinsic_load_ring_tess_factors_offset_amd || - intrin->intrinsic == nir_intrinsic_load_ring_tess_offchip_offset_amd || - intrin->intrinsic == nir_intrinsic_load_patch_vertices_in || - intrin->intrinsic == nir_intrinsic_load_tcs_num_patches_amd || - intrin->intrinsic == nir_intrinsic_load_ring_es2gs_offset_amd || - intrin->intrinsic == nir_intrinsic_load_tess_rel_patch_id_amd || - intrin->intrinsic == nir_intrinsic_load_gs_vertex_offset_amd || - intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_vertices_amd || - intrin->intrinsic == nir_intrinsic_load_workgroup_num_input_primitives_amd || - intrin->intrinsic == nir_intrinsic_load_packed_passthrough_primitive_amd || - intrin->intrinsic == nir_intrinsic_load_shader_query_enabled_amd || - intrin->intrinsic == nir_intrinsic_load_cull_any_enabled_amd || - intrin->intrinsic == nir_intrinsic_load_cull_front_face_enabled_amd || - intrin->intrinsic == nir_intrinsic_load_cull_back_face_enabled_amd || - intrin->intrinsic == nir_intrinsic_load_cull_ccw_amd || - intrin->intrinsic == nir_intrinsic_load_cull_small_primitives_enabled_amd || - intrin->intrinsic == nir_intrinsic_load_cull_small_prim_precision_amd || - intrin->intrinsic == nir_intrinsic_load_viewport_x_scale || - intrin->intrinsic == nir_intrinsic_load_viewport_x_offset || - intrin->intrinsic == nir_intrinsic_load_viewport_y_scale || - intrin->intrinsic == nir_intrinsic_load_viewport_y_offset || - intrin->intrinsic == nir_intrinsic_load_ring_task_draw_amd || - intrin->intrinsic == nir_intrinsic_load_ring_task_payload_amd || - intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_amd || - intrin->intrinsic == nir_intrinsic_load_ring_mesh_scratch_offset_amd || - intrin->intrinsic == nir_intrinsic_load_task_ring_entry_amd || - intrin->intrinsic == nir_intrinsic_load_task_ib_addr || - intrin->intrinsic == nir_intrinsic_load_task_ib_stride || - intrin->intrinsic == nir_intrinsic_load_lshs_vertex_stride_amd || - intrin->intrinsic == nir_intrinsic_load_hs_out_patch_data_offset_amd; + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement); + nir_instr_remove(instr); + nir_instr_free(instr); + + return true; } void @@ -268,5 +253,6 @@ radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, .use_llvm = use_llvm, }; - nir_shader_lower_instructions(shader, filter_abi_instr, lower_abi_instr, &state); + nir_shader_instructions_pass(shader, lower_abi_instr, + nir_metadata_dominance | nir_metadata_block_index, &state); } -- 2.7.4