From 3a72021d7cf4486c52f60dd3a6ae776235d5b587 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 2 Nov 2020 14:01:38 +0100 Subject: [PATCH] aco: store NIR range analysis data to the isel context It will be used to optimize some ALU instructions. Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 2 + src/amd/compiler/aco_instruction_selection.h | 5 + .../compiler/aco_instruction_selection_setup.cpp | 137 +++++++++++---------- 3 files changed, 78 insertions(+), 66 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 73792d3..5cf3db2 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11730,6 +11730,8 @@ void select_program(Program *program, ctx.inputs = ctx.outputs; ctx.outputs = shader_io_state(); } + + cleanup_context(&ctx); } program->config->float_mode = program->blocks[0].fp_mode.val; diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index cdb19ea..e145c8f 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -84,6 +84,10 @@ struct isel_context { std::unique_ptr nir_to_aco; /* NIR block index to ACO block index */ } cf_info; + /* NIR range analysis. */ + struct hash_table *range_ht; + nir_unsigned_upper_bound_config ub_config; + uint32_t resource_flag_offsets[MAX_SETS]; std::vector buffer_resource_flags; @@ -211,6 +215,7 @@ inline bool can_subdword_ssbo_store_use_smem(nir_intrinsic_instr *intrin) } void init_context(isel_context *ctx, nir_shader *shader); +void cleanup_context(isel_context *ctx); isel_context setup_isel_context(Program* program, diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 0ce0324..e2a0148 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -249,8 +249,7 @@ void fill_desc_set_info(isel_context *ctx, nir_function_impl *impl) } } -void apply_nuw_to_ssa(nir_shader *shader, struct hash_table *range_ht, nir_ssa_def *ssa, - const nir_unsigned_upper_bound_config *config) +void apply_nuw_to_ssa(isel_context *ctx, nir_ssa_def *ssa) { nir_ssa_scalar scalar; scalar.def = ssa; @@ -273,69 +272,15 @@ void apply_nuw_to_ssa(nir_shader *shader, struct hash_table *range_ht, nir_ssa_d src1 = tmp; } - uint32_t src1_ub = nir_unsigned_upper_bound(shader, range_ht, src1, config); - add->no_unsigned_wrap = !nir_addition_might_overflow(shader, range_ht, src0, src1_ub, config); + uint32_t src1_ub = nir_unsigned_upper_bound(ctx->shader, ctx->range_ht, + src1, &ctx->ub_config); + add->no_unsigned_wrap = + !nir_addition_might_overflow(ctx->shader, ctx->range_ht, src0, src1_ub, + &ctx->ub_config); } void apply_nuw_to_offsets(isel_context *ctx, nir_function_impl *impl) { - nir_unsigned_upper_bound_config config; - config.min_subgroup_size = 64; - config.max_subgroup_size = 64; - if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->options->key.cs.subgroup_size) { - config.min_subgroup_size = ctx->options->key.cs.subgroup_size; - config.max_subgroup_size = ctx->options->key.cs.subgroup_size; - } - config.max_work_group_invocations = 2048; - config.max_work_group_count[0] = 65535; - config.max_work_group_count[1] = 65535; - config.max_work_group_count[2] = 65535; - config.max_work_group_size[0] = 2048; - config.max_work_group_size[1] = 2048; - config.max_work_group_size[2] = 2048; - for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) { - unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i]; - unsigned dfmt = attrib_format & 0xf; - unsigned nfmt = (attrib_format >> 4) & 0x7; - - uint32_t max = UINT32_MAX; - if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) { - max = 0x3f800000u; - } else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || - nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) { - bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED; - switch (dfmt) { - case V_008F0C_BUF_DATA_FORMAT_8: - case V_008F0C_BUF_DATA_FORMAT_8_8: - case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: - max = uscaled ? 0x437f0000u : UINT8_MAX; - break; - case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: - case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: - max = uscaled ? 0x447fc000u : 1023; - break; - case V_008F0C_BUF_DATA_FORMAT_10_11_11: - case V_008F0C_BUF_DATA_FORMAT_11_11_10: - max = uscaled ? 0x44ffe000u : 2047; - break; - case V_008F0C_BUF_DATA_FORMAT_16: - case V_008F0C_BUF_DATA_FORMAT_16_16: - case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: - max = uscaled ? 0x477fff00u : UINT16_MAX; - break; - case V_008F0C_BUF_DATA_FORMAT_32: - case V_008F0C_BUF_DATA_FORMAT_32_32: - case V_008F0C_BUF_DATA_FORMAT_32_32_32: - case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: - max = uscaled ? 0x4f800000u : UINT32_MAX; - break; - } - } - config.vertex_attrib_max[i] = max; - } - - struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL); - nir_metadata_require(impl, nir_metadata_dominance); nir_foreach_block(block, impl) { @@ -349,24 +294,22 @@ void apply_nuw_to_offsets(isel_context *ctx, nir_function_impl *impl) case nir_intrinsic_load_uniform: case nir_intrinsic_load_push_constant: if (!nir_src_is_divergent(intrin->src[0])) - apply_nuw_to_ssa(ctx->shader, range_ht, intrin->src[0].ssa, &config); + apply_nuw_to_ssa(ctx, intrin->src[0].ssa); break; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: if (!nir_src_is_divergent(intrin->src[1])) - apply_nuw_to_ssa(ctx->shader, range_ht, intrin->src[1].ssa, &config); + apply_nuw_to_ssa(ctx, intrin->src[1].ssa); break; case nir_intrinsic_store_ssbo: if (!nir_src_is_divergent(intrin->src[2])) - apply_nuw_to_ssa(ctx->shader, range_ht, intrin->src[2].ssa, &config); + apply_nuw_to_ssa(ctx, intrin->src[2].ssa); break; default: break; } } } - - _mesa_hash_table_destroy(range_ht, NULL); } RegClass get_reg_class(isel_context *ctx, RegType type, unsigned components, unsigned bitsize) @@ -634,6 +577,63 @@ void init_context(isel_context *ctx, nir_shader *shader) unsigned lane_mask_size = ctx->program->lane_mask.size(); ctx->shader = shader; + + /* Init NIR range analysis. */ + ctx->range_ht =_mesa_pointer_hash_table_create(NULL); + ctx->ub_config.min_subgroup_size = 64; + ctx->ub_config.max_subgroup_size = 64; + if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->options->key.cs.subgroup_size) { + ctx->ub_config.min_subgroup_size = ctx->options->key.cs.subgroup_size; + ctx->ub_config.max_subgroup_size = ctx->options->key.cs.subgroup_size; + } + ctx->ub_config.max_work_group_invocations = 2048; + ctx->ub_config.max_work_group_count[0] = 65535; + ctx->ub_config.max_work_group_count[1] = 65535; + ctx->ub_config.max_work_group_count[2] = 65535; + ctx->ub_config.max_work_group_size[0] = 2048; + ctx->ub_config.max_work_group_size[1] = 2048; + ctx->ub_config.max_work_group_size[2] = 2048; + for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) { + unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i]; + unsigned dfmt = attrib_format & 0xf; + unsigned nfmt = (attrib_format >> 4) & 0x7; + + uint32_t max = UINT32_MAX; + if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) { + max = 0x3f800000u; + } else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || + nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) { + bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED; + switch (dfmt) { + case V_008F0C_BUF_DATA_FORMAT_8: + case V_008F0C_BUF_DATA_FORMAT_8_8: + case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: + max = uscaled ? 0x437f0000u : UINT8_MAX; + break; + case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: + case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: + max = uscaled ? 0x447fc000u : 1023; + break; + case V_008F0C_BUF_DATA_FORMAT_10_11_11: + case V_008F0C_BUF_DATA_FORMAT_11_11_10: + max = uscaled ? 0x44ffe000u : 2047; + break; + case V_008F0C_BUF_DATA_FORMAT_16: + case V_008F0C_BUF_DATA_FORMAT_16_16: + case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: + max = uscaled ? 0x477fff00u : UINT16_MAX; + break; + case V_008F0C_BUF_DATA_FORMAT_32: + case V_008F0C_BUF_DATA_FORMAT_32_32: + case V_008F0C_BUF_DATA_FORMAT_32_32_32: + case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: + max = uscaled ? 0x4f800000u : UINT32_MAX; + break; + } + } + ctx->ub_config.vertex_attrib_max[i] = max; + } + nir_divergence_analysis(shader); nir_opt_uniform_atomics(shader); @@ -1015,6 +1015,11 @@ void init_context(isel_context *ctx, nir_shader *shader) (uint8_t*)shader->constant_data + shader->constant_data_size); } +void cleanup_context(isel_context *ctx) +{ + _mesa_hash_table_destroy(ctx->range_ht, NULL); +} + isel_context setup_isel_context(Program* program, unsigned shader_count, -- 2.7.4