From: Rhys Perry Date: Thu, 6 Jan 2022 19:07:37 +0000 (+0000) Subject: ac/nir: use shorter builder names X-Git-Tag: upstream/22.3.5~13660 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9e171b6d495e1232721c1591be8a53e353d51d91;p=platform%2Fupstream%2Fmesa.git ac/nir: use shorter builder names This makes a lot of lines shorter. Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Reviewed-by: Emma Anholt Part-of: --- diff --git a/src/amd/common/ac_nir_cull.c b/src/amd/common/ac_nir_cull.c index 26e1f65..8765de3 100644 --- a/src/amd/common/ac_nir_cull.c +++ b/src/amd/common/ac_nir_cull.c @@ -70,10 +70,10 @@ cull_face(nir_builder *b, nir_ssa_def *pos[3][4], const position_w_info *w_info) nir_ssa_def *front_facing_cw = nir_flt(b, det, nir_imm_float(b, 0.0f)); nir_ssa_def *front_facing_ccw = nir_flt(b, nir_imm_float(b, 0.0f), det); - nir_ssa_def *ccw = nir_build_load_cull_ccw_amd(b); + nir_ssa_def *ccw = nir_load_cull_ccw_amd(b); nir_ssa_def *front_facing = nir_bcsel(b, ccw, front_facing_ccw, front_facing_cw); - nir_ssa_def *cull_front = nir_build_load_cull_front_face_enabled_amd(b); - nir_ssa_def *cull_back = nir_build_load_cull_back_face_enabled_amd(b); + nir_ssa_def *cull_front = nir_load_cull_front_face_enabled_amd(b); + nir_ssa_def *cull_back = nir_load_cull_back_face_enabled_amd(b); nir_ssa_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back); @@ -100,8 +100,8 @@ cull_bbox(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *accepted, const p bbox_max[chan] = nir_fmax(b, pos[0][chan], nir_fmax(b, pos[1][chan], pos[2][chan])); } - nir_ssa_def *vp_scale[2] = { nir_build_load_viewport_x_scale(b), nir_build_load_viewport_y_scale(b), }; - nir_ssa_def *vp_translate[2] = { nir_build_load_viewport_x_offset(b), nir_build_load_viewport_y_offset(b), }; + nir_ssa_def *vp_scale[2] = { nir_load_viewport_x_scale(b), nir_load_viewport_y_scale(b), }; + nir_ssa_def *vp_translate[2] = { nir_load_viewport_x_offset(b), nir_load_viewport_y_offset(b), }; nir_ssa_def *prim_outside_view = nir_imm_false(b); /* Frustrum culling - eliminate triangles that are fully outside the view. */ @@ -114,9 +114,9 @@ cull_bbox(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *accepted, const p nir_ssa_def *prim_is_small_else = nir_imm_false(b); /* Small primitive filter - eliminate triangles that are too small to affect a sample. */ - nir_if *if_cull_small_prims = nir_push_if(b, nir_build_load_cull_small_primitives_enabled_amd(b)); + nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_primitives_enabled_amd(b)); { - nir_ssa_def *small_prim_precision = nir_build_load_cull_small_prim_precision_amd(b); + nir_ssa_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b); prim_is_small = nir_imm_false(b); for (unsigned chan = 0; chan < 2; ++chan) { diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 1556378..5b805f0 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -189,7 +189,7 @@ summarize_repack(nir_builder *b, nir_ssa_def *packed_counts, unsigned num_lds_dw nir_ssa_def *dot_op = !use_dot ? NULL : nir_ushr(b, nir_ushr(b, nir_imm_int(b, 0x01010101), shift), shift); /* Broadcast the packed data we read from LDS (to the first 16 lanes, but we only care up to num_waves). */ - nir_ssa_def *packed = nir_build_lane_permute_16_amd(b, packed_counts, nir_imm_int(b, 0), nir_imm_int(b, 0)); + nir_ssa_def *packed = nir_lane_permute_16_amd(b, packed_counts, nir_imm_int(b, 0), nir_imm_int(b, 0)); /* Horizontally add the packed bytes. */ if (use_dot) { @@ -202,8 +202,8 @@ summarize_repack(nir_builder *b, nir_ssa_def *packed_counts, unsigned num_lds_dw nir_ssa_def *dot_op = !use_dot ? NULL : nir_ushr(b, nir_ushr(b, nir_imm_int64(b, 0x0101010101010101), shift), shift); /* Broadcast the packed data we read from LDS (to the first 16 lanes, but we only care up to num_waves). */ - nir_ssa_def *packed_dw0 = nir_build_lane_permute_16_amd(b, nir_unpack_64_2x32_split_x(b, packed_counts), nir_imm_int(b, 0), nir_imm_int(b, 0)); - nir_ssa_def *packed_dw1 = nir_build_lane_permute_16_amd(b, nir_unpack_64_2x32_split_y(b, packed_counts), nir_imm_int(b, 0), nir_imm_int(b, 0)); + nir_ssa_def *packed_dw0 = nir_lane_permute_16_amd(b, nir_unpack_64_2x32_split_x(b, packed_counts), nir_imm_int(b, 0), nir_imm_int(b, 0)); + nir_ssa_def *packed_dw1 = nir_lane_permute_16_amd(b, nir_unpack_64_2x32_split_y(b, packed_counts), nir_imm_int(b, 0), nir_imm_int(b, 0)); /* Horizontally add the packed bytes. */ if (use_dot) { @@ -238,14 +238,14 @@ repack_invocations_in_workgroup(nir_builder *b, nir_ssa_def *input_bool, * Implemented by a scalar instruction that simply counts the number of bits set in a 32/64-bit mask. */ - nir_ssa_def *input_mask = nir_build_ballot(b, 1, wave_size, input_bool); + nir_ssa_def *input_mask = nir_ballot(b, 1, wave_size, input_bool); nir_ssa_def *surviving_invocations_in_current_wave = nir_bit_count(b, input_mask); /* If we know at compile time that the workgroup has only 1 wave, no further steps are necessary. */ if (max_num_waves == 1) { wg_repack_result r = { .num_repacked_invocations = surviving_invocations_in_current_wave, - .repacked_invocation_index = nir_build_mbcnt_amd(b, input_mask, nir_imm_int(b, 0)), + .repacked_invocation_index = nir_mbcnt_amd(b, input_mask, nir_imm_int(b, 0)), }; return r; } @@ -263,16 +263,16 @@ repack_invocations_in_workgroup(nir_builder *b, nir_ssa_def *input_bool, const unsigned num_lds_dwords = DIV_ROUND_UP(max_num_waves, 4); assert(num_lds_dwords <= 2); - nir_ssa_def *wave_id = nir_build_load_subgroup_id(b); + nir_ssa_def *wave_id = nir_load_subgroup_id(b); nir_ssa_def *dont_care = nir_ssa_undef(b, 1, num_lds_dwords * 32); - nir_if *if_first_lane = nir_push_if(b, nir_build_elect(b, 1)); + nir_if *if_first_lane = nir_push_if(b, nir_elect(b, 1)); - nir_build_store_shared(b, nir_u2u8(b, surviving_invocations_in_current_wave), wave_id, .base = lds_addr_base); + nir_store_shared(b, nir_u2u8(b, surviving_invocations_in_current_wave), wave_id, .base = lds_addr_base); nir_scoped_barrier(b, .execution_scope=NIR_SCOPE_WORKGROUP, .memory_scope=NIR_SCOPE_WORKGROUP, .memory_semantics=NIR_MEMORY_ACQ_REL, .memory_modes=nir_var_mem_shared); - nir_ssa_def *packed_counts = nir_build_load_shared(b, 1, num_lds_dwords * 32, nir_imm_int(b, 0), .base = lds_addr_base, .align_mul = 8u); + nir_ssa_def *packed_counts = nir_load_shared(b, 1, num_lds_dwords * 32, nir_imm_int(b, 0), .base = lds_addr_base, .align_mul = 8u); nir_pop_if(b, if_first_lane); @@ -293,12 +293,12 @@ repack_invocations_in_workgroup(nir_builder *b, nir_ssa_def *input_bool, * This is the total number of surviving invocations in the workgroup. */ - nir_ssa_def *num_waves = nir_build_load_num_subgroups(b); + nir_ssa_def *num_waves = nir_load_num_subgroups(b); nir_ssa_def *sum = summarize_repack(b, packed_counts, num_lds_dwords); - nir_ssa_def *wg_repacked_index_base = nir_build_read_invocation(b, sum, wave_id); - nir_ssa_def *wg_num_repacked_invocations = nir_build_read_invocation(b, sum, num_waves); - nir_ssa_def *wg_repacked_index = nir_build_mbcnt_amd(b, input_mask, wg_repacked_index_base); + nir_ssa_def *wg_repacked_index_base = nir_read_invocation(b, sum, wave_id); + nir_ssa_def *wg_num_repacked_invocations = nir_read_invocation(b, sum, num_waves); + nir_ssa_def *wg_repacked_index = nir_mbcnt_amd(b, input_mask, wg_repacked_index_base); wg_repack_result r = { .num_repacked_invocations = wg_num_repacked_invocations, @@ -320,7 +320,7 @@ emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives, bool use_edgeflags) { nir_ssa_def *arg = use_edgeflags - ? nir_build_load_initial_edgeflags_amd(b) + ? nir_load_initial_edgeflags_amd(b) : nir_imm_int(b, 0); for (unsigned i = 0; i < num_vertices_per_primitives; ++i) { @@ -341,7 +341,7 @@ emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives, static nir_ssa_def * ngg_input_primitive_vertex_index(nir_builder *b, unsigned vertex) { - return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u), + return nir_ubfe(b, nir_load_gs_vertex_offset_amd(b, .base = vertex / 2u), nir_imm_int(b, (vertex & 1u) * 16u), nir_imm_int(b, 16u)); } @@ -350,7 +350,7 @@ emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st) { if (st->passthrough) { assert(!st->export_prim_id || b->shader->info.stage != MESA_SHADER_VERTEX); - return nir_build_load_packed_passthrough_primitive_amd(b); + return nir_load_packed_passthrough_primitive_amd(b); } else { nir_ssa_def *vtx_idx[3] = {0}; @@ -371,7 +371,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def { nir_ssa_def *gs_thread = st->gs_accepted_var ? nir_load_var(b, st->gs_accepted_var) - : nir_build_has_input_primitive_amd(b); + : nir_has_input_primitive_amd(b); nir_if *if_gs_thread = nir_push_if(b, gs_thread); { @@ -380,14 +380,14 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) { /* Copy Primitive IDs from GS threads to the LDS address corresponding to the ES thread of the provoking vertex. */ - nir_ssa_def *prim_id = nir_build_load_primitive_id(b); + nir_ssa_def *prim_id = nir_load_primitive_id(b); nir_ssa_def *provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx); nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u); - nir_build_store_shared(b, prim_id, addr); + nir_store_shared(b, prim_id, addr); } - nir_build_export_primitive_amd(b, arg); + nir_export_primitive_amd(b, arg); } nir_pop_if(b, if_gs_thread); } @@ -403,14 +403,14 @@ emit_store_ngg_nogs_es_primitive_id(nir_builder *b) .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared); /* LDS address where the primitive ID is stored */ - nir_ssa_def *thread_id_in_threadgroup = nir_build_load_local_invocation_index(b); + nir_ssa_def *thread_id_in_threadgroup = nir_load_local_invocation_index(b); nir_ssa_def *addr = pervertex_lds_addr(b, thread_id_in_threadgroup, 4u); /* Load primitive ID from LDS */ - prim_id = nir_build_load_shared(b, 1, 32, addr); + prim_id = nir_load_shared(b, 1, 32, addr); } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) { /* Just use tess eval primitive ID, which is the same as the patch ID. */ - prim_id = nir_build_load_primitive_id(b); + prim_id = nir_load_primitive_id(b); } nir_io_semantics io_sem = { @@ -418,9 +418,9 @@ emit_store_ngg_nogs_es_primitive_id(nir_builder *b) .num_slots = 1, }; - nir_build_store_output(b, prim_id, nir_imm_zero(b, 1, 32), - .base = io_sem.location, - .src_type = nir_type_uint32, .io_semantics = io_sem); + nir_store_output(b, prim_id, nir_imm_zero(b, 1, 32), + .base = io_sem.location, + .src_type = nir_type_uint32, .io_semantics = io_sem); } static bool @@ -715,16 +715,16 @@ compact_vertices_after_culling(nir_builder *b, nir_ssa_def *exporter_addr = pervertex_lds_addr(b, es_exporter_tid, pervertex_lds_bytes); /* Store the exporter thread's index to the LDS space of the current thread so GS threads can load it */ - nir_build_store_shared(b, nir_u2u8(b, es_exporter_tid), es_vertex_lds_addr, .base = lds_es_exporter_tid); + nir_store_shared(b, nir_u2u8(b, es_exporter_tid), es_vertex_lds_addr, .base = lds_es_exporter_tid); /* Store the current thread's position output to the exporter thread's LDS space */ nir_ssa_def *pos = nir_load_var(b, position_value_var); - nir_build_store_shared(b, pos, exporter_addr, .base = lds_es_pos_x); + nir_store_shared(b, pos, exporter_addr, .base = lds_es_pos_x); /* Store the current thread's repackable arguments to the exporter thread's LDS space */ for (unsigned i = 0; i < max_exported_args; ++i) { nir_ssa_def *arg_val = nir_load_var(b, repacked_arg_vars[i]); - nir_intrinsic_instr *store = nir_build_store_shared(b, arg_val, exporter_addr, .base = lds_es_arg_0 + 4u * i); + nir_intrinsic_instr *store = nir_store_shared(b, arg_val, exporter_addr, .base = lds_es_arg_0 + 4u * i); nogs_state->compact_arg_stores[i] = &store->instr; } @@ -742,12 +742,12 @@ compact_vertices_after_culling(nir_builder *b, nir_if *if_packed_es_thread = nir_push_if(b, es_survived); { /* Read position from the current ES thread's LDS space (written by the exported vertex's ES thread) */ - nir_ssa_def *exported_pos = nir_build_load_shared(b, 4, 32, es_vertex_lds_addr, .base = lds_es_pos_x); + nir_ssa_def *exported_pos = nir_load_shared(b, 4, 32, es_vertex_lds_addr, .base = lds_es_pos_x); nir_store_var(b, position_value_var, exported_pos, 0xfu); /* Read the repacked arguments */ for (unsigned i = 0; i < max_exported_args; ++i) { - nir_ssa_def *arg_val = nir_build_load_shared(b, 1, 32, es_vertex_lds_addr, .base = lds_es_arg_0 + 4u * i); + nir_ssa_def *arg_val = nir_load_shared(b, 1, 32, es_vertex_lds_addr, .base = lds_es_arg_0 + 4u * i); nir_store_var(b, repacked_arg_vars[i], arg_val, 0x1u); } } @@ -766,7 +766,7 @@ compact_vertices_after_culling(nir_builder *b, /* Load the index of the ES threads that will export the current GS thread's vertices */ for (unsigned v = 0; v < 3; ++v) { nir_ssa_def *vtx_addr = nir_load_var(b, gs_vtxaddr_vars[v]); - nir_ssa_def *exporter_vtx_idx = nir_build_load_shared(b, 1, 8, vtx_addr, .base = lds_es_exporter_tid); + nir_ssa_def *exporter_vtx_idx = nir_load_shared(b, 1, 8, vtx_addr, .base = lds_es_exporter_tid); exporter_vtx_indices[v] = nir_u2u32(b, exporter_vtx_idx); } @@ -776,7 +776,7 @@ compact_vertices_after_culling(nir_builder *b, nir_pop_if(b, if_gs_accepted); nir_store_var(b, es_accepted_var, es_survived, 0x1u); - nir_store_var(b, gs_accepted_var, nir_bcsel(b, fully_culled, nir_imm_false(b), nir_build_has_input_primitive_amd(b)), 0x1u); + nir_store_var(b, gs_accepted_var, nir_bcsel(b, fully_culled, nir_imm_false(b), nir_has_input_primitive_amd(b)), 0x1u); } static void @@ -1081,7 +1081,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c b->cursor = nir_before_cf_list(&impl->body); - nir_ssa_def *es_thread = nir_build_has_input_vertex_amd(b); + nir_ssa_def *es_thread = nir_has_input_vertex_amd(b); nir_if *if_es_thread = nir_push_if(b, es_thread); { /* Initialize the position output variable to zeroes, in case not all VS/TES invocations store the output. @@ -1097,16 +1097,16 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c /* Remember the current thread's shader arguments */ if (b->shader->info.stage == MESA_SHADER_VERTEX) { - nir_store_var(b, repacked_arg_vars[0], nir_build_load_vertex_id_zero_base(b), 0x1u); + nir_store_var(b, repacked_arg_vars[0], nir_load_vertex_id_zero_base(b), 0x1u); if (uses_instance_id) - nir_store_var(b, repacked_arg_vars[1], nir_build_load_instance_id(b), 0x1u); + nir_store_var(b, repacked_arg_vars[1], nir_load_instance_id(b), 0x1u); } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) { - nir_ssa_def *tess_coord = nir_build_load_tess_coord(b); + nir_ssa_def *tess_coord = nir_load_tess_coord(b); nir_store_var(b, repacked_arg_vars[0], nir_channel(b, tess_coord, 0), 0x1u); nir_store_var(b, repacked_arg_vars[1], nir_channel(b, tess_coord, 1), 0x1u); - nir_store_var(b, repacked_arg_vars[2], nir_build_load_tess_rel_patch_id_amd(b), 0x1u); + nir_store_var(b, repacked_arg_vars[2], nir_load_tess_rel_patch_id_amd(b), 0x1u); if (uses_tess_primitive_id) - nir_store_var(b, repacked_arg_vars[3], nir_build_load_primitive_id(b), 0x1u); + nir_store_var(b, repacked_arg_vars[3], nir_load_primitive_id(b), 0x1u); } else { unreachable("Should be VS or TES."); } @@ -1114,7 +1114,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c nir_pop_if(b, if_es_thread); nir_store_var(b, es_accepted_var, es_thread, 0x1u); - nir_store_var(b, gs_accepted_var, nir_build_has_input_primitive_amd(b), 0x1u); + nir_store_var(b, gs_accepted_var, nir_has_input_primitive_amd(b), 0x1u); /* Remove all non-position outputs, and put the position output into the variable. */ nir_metadata_preserve(impl, nir_metadata_none); @@ -1128,24 +1128,24 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c * by the following NIR intrinsic. */ - nir_if *if_cull_en = nir_push_if(b, nir_build_load_cull_any_enabled_amd(b)); + nir_if *if_cull_en = nir_push_if(b, nir_load_cull_any_enabled_amd(b)); { - nir_ssa_def *invocation_index = nir_build_load_local_invocation_index(b); + nir_ssa_def *invocation_index = nir_load_local_invocation_index(b); nir_ssa_def *es_vertex_lds_addr = pervertex_lds_addr(b, invocation_index, pervertex_lds_bytes); /* ES invocations store their vertex data to LDS for GS threads to read. */ - if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b)); + if_es_thread = nir_push_if(b, nir_has_input_vertex_amd(b)); { /* Store position components that are relevant to culling in LDS */ nir_ssa_def *pre_cull_pos = nir_load_var(b, position_value_var); nir_ssa_def *pre_cull_w = nir_channel(b, pre_cull_pos, 3); - nir_build_store_shared(b, pre_cull_w, es_vertex_lds_addr, .base = lds_es_pos_w); + nir_store_shared(b, pre_cull_w, es_vertex_lds_addr, .base = lds_es_pos_w); nir_ssa_def *pre_cull_x_div_w = nir_fdiv(b, nir_channel(b, pre_cull_pos, 0), pre_cull_w); nir_ssa_def *pre_cull_y_div_w = nir_fdiv(b, nir_channel(b, pre_cull_pos, 1), pre_cull_w); - nir_build_store_shared(b, nir_vec2(b, pre_cull_x_div_w, pre_cull_y_div_w), es_vertex_lds_addr, .base = lds_es_pos_x); + nir_store_shared(b, nir_vec2(b, pre_cull_x_div_w, pre_cull_y_div_w), es_vertex_lds_addr, .base = lds_es_pos_x); /* Clear out the ES accepted flag in LDS */ - nir_build_store_shared(b, nir_imm_zero(b, 1, 8), es_vertex_lds_addr, .align_mul = 4, .base = lds_es_vertex_accepted); + nir_store_shared(b, nir_imm_zero(b, 1, 8), es_vertex_lds_addr, .align_mul = 4, .base = lds_es_vertex_accepted); } nir_pop_if(b, if_es_thread); @@ -1156,7 +1156,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c nir_store_var(b, prim_exp_arg_var, nir_imm_int(b, 1u << 31), 0x1u); /* GS invocations load the vertex data and perform the culling. */ - nir_if *if_gs_thread = nir_push_if(b, nir_build_has_input_primitive_amd(b)); + nir_if *if_gs_thread = nir_push_if(b, nir_has_input_primitive_amd(b)); { /* Load vertex indices from input VGPRs */ nir_ssa_def *vtx_idx[3] = {0}; @@ -1169,13 +1169,13 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c /* Load W positions of vertices first because the culling code will use these first */ for (unsigned vtx = 0; vtx < 3; ++vtx) { vtx_addr[vtx] = pervertex_lds_addr(b, vtx_idx[vtx], pervertex_lds_bytes); - pos[vtx][3] = nir_build_load_shared(b, 1, 32, vtx_addr[vtx], .base = lds_es_pos_w); + pos[vtx][3] = nir_load_shared(b, 1, 32, vtx_addr[vtx], .base = lds_es_pos_w); nir_store_var(b, gs_vtxaddr_vars[vtx], vtx_addr[vtx], 0x1u); } /* Load the X/W, Y/W positions of vertices */ for (unsigned vtx = 0; vtx < 3; ++vtx) { - nir_ssa_def *xy = nir_build_load_shared(b, 2, 32, vtx_addr[vtx], .base = lds_es_pos_x); + nir_ssa_def *xy = nir_load_shared(b, 2, 32, vtx_addr[vtx], .base = lds_es_pos_x); pos[vtx][0] = nir_channel(b, xy, 0); pos[vtx][1] = nir_channel(b, xy, 1); } @@ -1188,7 +1188,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c { /* Store the accepted state to LDS for ES threads */ for (unsigned vtx = 0; vtx < 3; ++vtx) - nir_build_store_shared(b, nir_imm_intN_t(b, 0xff, 8), vtx_addr[vtx], .base = lds_es_vertex_accepted, .align_mul = 4u); + nir_store_shared(b, nir_imm_intN_t(b, 0xff, 8), vtx_addr[vtx], .base = lds_es_vertex_accepted, .align_mul = 4u); } nir_pop_if(b, if_gs_accepted); } @@ -1200,9 +1200,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c nir_store_var(b, es_accepted_var, nir_imm_bool(b, false), 0x1u); /* ES invocations load their accepted flag from LDS. */ - if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b)); + if_es_thread = nir_push_if(b, nir_has_input_vertex_amd(b)); { - nir_ssa_def *accepted = nir_build_load_shared(b, 1, 8u, es_vertex_lds_addr, .base = lds_es_vertex_accepted, .align_mul = 4u); + nir_ssa_def *accepted = nir_load_shared(b, 1, 8u, es_vertex_lds_addr, .base = lds_es_vertex_accepted, .align_mul = 4u); nir_ssa_def *accepted_bool = nir_ine(b, accepted, nir_imm_intN_t(b, 0, 8)); nir_store_var(b, es_accepted_var, accepted_bool, 0x1u); } @@ -1217,14 +1217,14 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c nir_ssa_def *es_exporter_tid = rep.repacked_invocation_index; /* If all vertices are culled, set primitive count to 0 as well. */ - nir_ssa_def *num_exported_prims = nir_build_load_workgroup_num_input_primitives_amd(b); + nir_ssa_def *num_exported_prims = nir_load_workgroup_num_input_primitives_amd(b); nir_ssa_def *fully_culled = nir_ieq_imm(b, num_live_vertices_in_workgroup, 0u); num_exported_prims = nir_bcsel(b, fully_culled, nir_imm_int(b, 0u), num_exported_prims); - nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0))); + nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0))); { /* Tell the final vertex and primitive count to the HW. */ - nir_build_alloc_vertices_and_primitives_amd(b, num_live_vertices_in_workgroup, num_exported_prims); + nir_alloc_vertices_and_primitives_amd(b, num_live_vertices_in_workgroup, num_exported_prims); } nir_pop_if(b, if_wave_0); @@ -1238,11 +1238,11 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c nir_push_else(b, if_cull_en); { /* When culling is disabled, we do the same as we would without culling. */ - nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0))); + nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0))); { - nir_ssa_def *vtx_cnt = nir_build_load_workgroup_num_input_vertices_amd(b); - nir_ssa_def *prim_cnt = nir_build_load_workgroup_num_input_primitives_amd(b); - nir_build_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt); + nir_ssa_def *vtx_cnt = nir_load_workgroup_num_input_vertices_amd(b); + nir_ssa_def *prim_cnt = nir_load_workgroup_num_input_primitives_amd(b); + nir_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt); } nir_pop_if(b, if_wave_0); nir_store_var(b, prim_exp_arg_var, emit_ngg_nogs_prim_exp_arg(b, nogs_state), 0x1u); @@ -1268,11 +1268,11 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c if (b->shader->info.stage == MESA_SHADER_VERTEX) nogs_state->overwrite_args = - nir_build_overwrite_vs_arguments_amd(b, + nir_overwrite_vs_arguments_amd(b, nir_load_var(b, repacked_arg_vars[0]), nir_load_var(b, repacked_arg_vars[1])); else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) nogs_state->overwrite_args = - nir_build_overwrite_tes_arguments_amd(b, + nir_overwrite_tes_arguments_amd(b, nir_load_var(b, repacked_arg_vars[0]), nir_load_var(b, repacked_arg_vars[1]), nir_load_var(b, repacked_arg_vars[2]), nir_load_var(b, repacked_arg_vars[3])); else @@ -1341,11 +1341,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, if (!can_cull) { /* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */ - nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0))); + nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0))); { - nir_ssa_def *vtx_cnt = nir_build_load_workgroup_num_input_vertices_amd(b); - nir_ssa_def *prim_cnt = nir_build_load_workgroup_num_input_primitives_amd(b); - nir_build_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt); + nir_ssa_def *vtx_cnt = nir_load_workgroup_num_input_vertices_amd(b); + nir_ssa_def *prim_cnt = nir_load_workgroup_num_input_primitives_amd(b); + nir_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt); } nir_pop_if(b, if_wave_0); @@ -1363,7 +1363,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, } nir_intrinsic_instr *export_vertex_instr; - nir_ssa_def *es_thread = can_cull ? nir_load_var(b, es_accepted_var) : nir_build_has_input_vertex_amd(b); + nir_ssa_def *es_thread = can_cull ? nir_load_var(b, es_accepted_var) : nir_has_input_vertex_amd(b); nir_if *if_es_thread = nir_push_if(b, es_thread); { @@ -1372,7 +1372,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, b->cursor = nir_after_cf_list(&if_es_thread->then_list); /* Export all vertex attributes (except primitive ID) */ - export_vertex_instr = nir_build_export_vertex_amd(b); + export_vertex_instr = nir_export_vertex_amd(b); /* Export primitive ID (in case of early primitive export or TES) */ if (state.export_prim_id && (state.early_prim_export || shader->info.stage != MESA_SHADER_VERTEX)) @@ -1384,7 +1384,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, if (!state.early_prim_export) { emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var)); if (state.export_prim_id && shader->info.stage == MESA_SHADER_VERTEX) { - if_es_thread = nir_push_if(b, can_cull ? es_thread : nir_build_has_input_vertex_amd(b)); + if_es_thread = nir_push_if(b, can_cull ? es_thread : nir_has_input_vertex_amd(b)); emit_store_ngg_nogs_es_primitive_id(b); nir_pop_if(b, if_es_thread); } @@ -1405,7 +1405,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var); nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 }; - nir_build_store_output(b, pos_val, nir_imm_int(b, 0), .base = VARYING_SLOT_POS, .component = 0, .io_semantics = io_sem); + nir_store_output(b, pos_val, nir_imm_int(b, 0), .base = VARYING_SLOT_POS, .component = 0, .io_semantics = io_sem); } nir_metadata_preserve(impl, nir_metadata_none); @@ -1457,7 +1457,7 @@ ngg_gs_out_vertex_addr(nir_builder *b, nir_ssa_def *out_vtx_idx, lower_ngg_gs_st static nir_ssa_def * ngg_gs_emit_vertex_addr(nir_builder *b, nir_ssa_def *gs_vtx_idx, lower_ngg_gs_state *s) { - nir_ssa_def *tid_in_tg = nir_build_load_local_invocation_index(b); + nir_ssa_def *tid_in_tg = nir_load_local_invocation_index(b); nir_ssa_def *gs_out_vtx_base = nir_imul_imm(b, tid_in_tg, b->shader->info.gs.vertices_out); nir_ssa_def *out_vtx_idx = nir_iadd_nuw(b, gs_out_vtx_base, gs_vtx_idx); @@ -1480,7 +1480,7 @@ ngg_gs_clear_primflags(nir_builder *b, nir_ssa_def *num_vertices, unsigned strea nir_push_else(b, if_break); { nir_ssa_def *emit_vtx_addr = ngg_gs_emit_vertex_addr(b, current_clear_primflag_idx, s); - nir_build_store_shared(b, zero_u8, emit_vtx_addr, .base = s->lds_offs_primflags + stream); + nir_store_shared(b, zero_u8, emit_vtx_addr, .base = s->lds_offs_primflags + stream); nir_store_var(b, s->current_clear_primflag_idx_var, nir_iadd_imm_nuw(b, current_clear_primflag_idx, 1), 0x1u); } nir_pop_if(b, if_break); @@ -1491,7 +1491,7 @@ ngg_gs_clear_primflags(nir_builder *b, nir_ssa_def *num_vertices, unsigned strea static void ngg_gs_shader_query(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg_gs_state *s) { - nir_if *if_shader_query = nir_push_if(b, nir_build_load_shader_query_enabled_amd(b)); + nir_if *if_shader_query = nir_push_if(b, nir_load_shader_query_enabled_amd(b)); nir_ssa_def *num_prims_in_wave = NULL; /* Calculate the "real" number of emitted primitives from the emitted GS vertices and primitives. @@ -1502,19 +1502,19 @@ ngg_gs_shader_query(nir_builder *b, nir_intrinsic_instr *intrin, lower_ngg_gs_st unsigned gs_vtx_cnt = nir_src_as_uint(intrin->src[0]); unsigned gs_prm_cnt = nir_src_as_uint(intrin->src[1]); unsigned total_prm_cnt = gs_vtx_cnt - gs_prm_cnt * (s->num_vertices_per_primitive - 1u); - nir_ssa_def *num_threads = nir_bit_count(b, nir_build_ballot(b, 1, s->wave_size, nir_imm_bool(b, true))); + nir_ssa_def *num_threads = nir_bit_count(b, nir_ballot(b, 1, s->wave_size, nir_imm_bool(b, true))); num_prims_in_wave = nir_imul_imm(b, num_threads, total_prm_cnt); } else { nir_ssa_def *gs_vtx_cnt = intrin->src[0].ssa; nir_ssa_def *prm_cnt = intrin->src[1].ssa; if (s->num_vertices_per_primitive > 1) prm_cnt = nir_iadd_nuw(b, nir_imul_imm(b, prm_cnt, -1u * (s->num_vertices_per_primitive - 1)), gs_vtx_cnt); - num_prims_in_wave = nir_build_reduce(b, prm_cnt, .reduction_op = nir_op_iadd); + num_prims_in_wave = nir_reduce(b, prm_cnt, .reduction_op = nir_op_iadd); } /* Store the query result to GDS using an atomic add. */ - nir_if *if_first_lane = nir_push_if(b, nir_build_elect(b, 1)); - nir_build_gds_atomic_add_amd(b, 32, num_prims_in_wave, nir_imm_int(b, 0), nir_imm_int(b, 0x100)); + nir_if *if_first_lane = nir_push_if(b, nir_elect(b, 1)); + nir_gds_atomic_add_amd(b, 32, num_prims_in_wave, nir_imm_int(b, 0), nir_imm_int(b, 0x100)); nir_pop_if(b, if_first_lane); nir_pop_if(b, if_shader_query); @@ -1604,7 +1604,7 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri if (info->bit_size != 32) out_val = nir_u2u(b, out_val, info->bit_size); - nir_build_store_shared(b, out_val, gs_emit_vtx_addr, .base = packed_location * 16 + comp * 4); + nir_store_shared(b, out_val, gs_emit_vtx_addr, .base = packed_location * 16 + comp * 4); /* Clear the variable that holds the output */ nir_store_var(b, s->output_vars[slot][comp], nir_ssa_undef(b, 1, 32), 0x1u); @@ -1625,7 +1625,7 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri prim_flag = nir_iadd_nuw(b, prim_flag, nir_ishl(b, odd, nir_imm_int(b, 1))); } - nir_build_store_shared(b, nir_u2u8(b, prim_flag), gs_emit_vtx_addr, .base = s->lds_offs_primflags + stream, .align_mul = 4u); + nir_store_shared(b, nir_u2u8(b, prim_flag), gs_emit_vtx_addr, .base = s->lds_offs_primflags + stream, .align_mul = 4u); nir_instr_remove(&intrin->instr); return true; } @@ -1724,7 +1724,7 @@ ngg_gs_export_primitives(nir_builder *b, nir_ssa_def *max_num_out_prims, nir_ssa } nir_ssa_def *arg = emit_pack_ngg_prim_exp_arg(b, s->num_vertices_per_primitive, vtx_indices, is_null_prim, false); - nir_build_export_primitive_amd(b, arg); + nir_export_primitive_amd(b, arg); nir_pop_if(b, if_prim_export_thread); } @@ -1740,7 +1740,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_ssa_def *max_num_out_vtx, nir_ssa_def * The current thread will export a vertex that was live in another invocation. * Load the index of the vertex that the current thread will have to export. */ - nir_ssa_def *exported_vtx_idx = nir_build_load_shared(b, 1, 8, out_vtx_lds_addr, .base = s->lds_offs_primflags + 1); + nir_ssa_def *exported_vtx_idx = nir_load_shared(b, 1, 8, out_vtx_lds_addr, .base = s->lds_offs_primflags + 1); exported_out_vtx_lds_addr = ngg_gs_out_vertex_addr(b, nir_u2u32(b, exported_vtx_idx), s); } @@ -1756,12 +1756,12 @@ ngg_gs_export_vertices(nir_builder *b, nir_ssa_def *max_num_out_vtx, nir_ssa_def if (info->stream != 0 || info->bit_size == 0) continue; - nir_ssa_def *load = nir_build_load_shared(b, 1, info->bit_size, exported_out_vtx_lds_addr, .base = packed_location * 16u + comp * 4u, .align_mul = 4u); - nir_build_store_output(b, load, nir_imm_int(b, 0), .base = slot, .component = comp, .io_semantics = io_sem); + nir_ssa_def *load = nir_load_shared(b, 1, info->bit_size, exported_out_vtx_lds_addr, .base = packed_location * 16u + comp * 4u, .align_mul = 4u); + nir_store_output(b, load, nir_imm_int(b, 0), .base = slot, .component = comp, .io_semantics = io_sem); } } - nir_build_export_vertex_amd(b); + nir_export_vertex_amd(b); nir_pop_if(b, if_vtx_export_thread); } @@ -1779,7 +1779,7 @@ ngg_gs_setup_vertex_compaction(nir_builder *b, nir_ssa_def *vertex_live, nir_ssa nir_ssa_def *exporter_lds_addr = ngg_gs_out_vertex_addr(b, exporter_tid_in_tg, s); nir_ssa_def *tid_in_tg_u8 = nir_u2u8(b, tid_in_tg); - nir_build_store_shared(b, tid_in_tg_u8, exporter_lds_addr, .base = s->lds_offs_primflags + 1); + nir_store_shared(b, tid_in_tg_u8, exporter_lds_addr, .base = s->lds_offs_primflags + 1); } nir_pop_if(b, if_vertex_live); } @@ -1791,7 +1791,7 @@ ngg_gs_load_out_vtx_primflag_0(nir_builder *b, nir_ssa_def *tid_in_tg, nir_ssa_d nir_ssa_def *zero = nir_imm_int(b, 0); nir_if *if_outvtx_thread = nir_push_if(b, nir_ilt(b, tid_in_tg, max_num_out_vtx)); - nir_ssa_def *primflag_0 = nir_build_load_shared(b, 1, 8, vtx_lds_addr, .base = s->lds_offs_primflags, .align_mul = 4u); + nir_ssa_def *primflag_0 = nir_load_shared(b, 1, 8, vtx_lds_addr, .base = s->lds_offs_primflags, .align_mul = 4u); primflag_0 = nir_u2u32(b, primflag_0); nir_pop_if(b, if_outvtx_thread); @@ -1801,8 +1801,8 @@ ngg_gs_load_out_vtx_primflag_0(nir_builder *b, nir_ssa_def *tid_in_tg, nir_ssa_d static void ngg_gs_finale(nir_builder *b, lower_ngg_gs_state *s) { - nir_ssa_def *tid_in_tg = nir_build_load_local_invocation_index(b); - nir_ssa_def *max_vtxcnt = nir_build_load_workgroup_num_input_vertices_amd(b); + nir_ssa_def *tid_in_tg = nir_load_local_invocation_index(b); + nir_ssa_def *max_vtxcnt = nir_load_workgroup_num_input_vertices_amd(b); nir_ssa_def *max_prmcnt = max_vtxcnt; /* They are currently practically the same; both RADV and RadeonSI do this. */ nir_ssa_def *out_vtx_lds_addr = ngg_gs_out_vertex_addr(b, tid_in_tg, s); @@ -1810,8 +1810,8 @@ ngg_gs_finale(nir_builder *b, lower_ngg_gs_state *s) /* When the output is compile-time known, the GS writes all possible vertices and primitives it can. * The gs_alloc_req needs to happen on one wave only, otherwise the HW hangs. */ - nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_zero(b, 1, 32))); - nir_build_alloc_vertices_and_primitives_amd(b, max_vtxcnt, max_prmcnt); + nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_zero(b, 1, 32))); + nir_alloc_vertices_and_primitives_amd(b, max_vtxcnt, max_prmcnt); nir_pop_if(b, if_wave_0); } @@ -1843,8 +1843,8 @@ ngg_gs_finale(nir_builder *b, lower_ngg_gs_state *s) max_prmcnt = nir_bcsel(b, any_output, max_prmcnt, nir_imm_int(b, 0)); /* Allocate export space. We currently don't compact primitives, just use the maximum number. */ - nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_zero(b, 1, 32))); - nir_build_alloc_vertices_and_primitives_amd(b, workgroup_num_vertices, max_prmcnt); + nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_zero(b, 1, 32))); + nir_alloc_vertices_and_primitives_amd(b, workgroup_num_vertices, max_prmcnt); nir_pop_if(b, if_wave_0); /* Vertex compaction. This makes sure there are no gaps between threads that export vertices. */ @@ -1914,7 +1914,7 @@ ac_nir_lower_ngg_gs(nir_shader *shader, .memory_semantics=NIR_MEMORY_ACQ_REL, .memory_modes=nir_var_mem_shared); /* Wrap the GS control flow. */ - nir_if *if_gs_thread = nir_push_if(b, nir_build_has_input_primitive_amd(b)); + nir_if *if_gs_thread = nir_push_if(b, nir_has_input_primitive_amd(b)); /* Create and initialize output variables */ for (unsigned slot = 0; slot < VARYING_SLOT_MAX; ++slot) { @@ -1968,7 +1968,7 @@ lower_ms_store_output(nir_builder *b, assert(base == 0); nir_ssa_def *addr = nir_imm_int(b, 0); - nir_build_store_shared(b, nir_u2u32(b, store_val), addr, .base = s->numprims_lds_addr); + nir_store_shared(b, nir_u2u32(b, store_val), addr, .base = s->numprims_lds_addr); } else if (io_sem.location == VARYING_SLOT_PRIMITIVE_INDICES) { /* Contrary to the name, these are not primitive indices, but * vertex indices for each vertex of the output primitives. @@ -1976,8 +1976,8 @@ lower_ms_store_output(nir_builder *b, */ nir_ssa_def *offset_src = nir_get_io_offset_src(intrin)->ssa; - nir_build_store_shared(b, nir_u2u8(b, store_val), offset_src, - .base = s->prim_vtx_indices_addr + base); + nir_store_shared(b, nir_u2u8(b, store_val), offset_src, + .base = s->prim_vtx_indices_addr + base); } else { unreachable("Invalid mesh shader output"); } @@ -2002,10 +2002,10 @@ lower_ms_load_output(nir_builder *b, assert(base == 0); nir_ssa_def *addr = nir_imm_int(b, 0); - return nir_build_load_shared(b, 1, 32, addr, .base = s->numprims_lds_addr); + return nir_load_shared(b, 1, 32, addr, .base = s->numprims_lds_addr); } else if (io_sem.location == VARYING_SLOT_PRIMITIVE_INDICES) { nir_ssa_def *offset_src = nir_get_io_offset_src(intrin)->ssa; - nir_ssa_def *index = nir_build_load_shared(b, 1, 8, offset_src, + nir_ssa_def *index = nir_load_shared(b, 1, 8, offset_src, .base = s->prim_vtx_indices_addr + base); return nir_u2u(b, index, intrin->dest.ssa.bit_size); } @@ -2117,9 +2117,9 @@ ms_store_arrayed_output_intrin(nir_builder *b, unsigned const_off = base_shared_addr + component_offset * 4; - nir_build_store_shared(b, store_val, addr, .base = const_off, - .write_mask = write_mask, .align_mul = 16, - .align_offset = const_off % 16); + nir_store_shared(b, store_val, addr, .base = const_off, + .write_mask = write_mask, .align_mul = 16, + .align_offset = const_off % 16); } static nir_ssa_def * @@ -2139,9 +2139,9 @@ ms_load_arrayed_output(nir_builder *b, nir_ssa_def *base_addr_off = nir_imul_imm(b, base_offset, 16); nir_ssa_def *addr = nir_iadd_nuw(b, base_addr, base_addr_off); - return nir_build_load_shared(b, num_components, load_bit_size, addr, .align_mul = 16, - .align_offset = component_addr_off % 16, - .base = base_shared_addr + component_addr_off); + return nir_load_shared(b, num_components, load_bit_size, addr, .align_mul = 16, + .align_offset = component_addr_off % 16, + .base = base_shared_addr + component_addr_off); } static nir_ssa_def * @@ -2304,8 +2304,8 @@ ms_emit_arrayed_outputs(nir_builder *b, ms_load_arrayed_output(b, invocation_index, zero, driver_location, start_comp, num_components, 32, num_arrayed_outputs, lds_base_addr); - nir_build_store_output(b, load, nir_imm_int(b, 0), .base = slot, .component = start_comp, - .io_semantics = io_sem); + nir_store_output(b, load, nir_imm_int(b, 0), .base = slot, .component = start_comp, + .io_semantics = io_sem); } } } @@ -2342,27 +2342,27 @@ emit_ms_finale(nir_shader *shader, lower_ngg_ms_state *s) nir_ssa_def *loaded_num_prm; nir_ssa_def *zero = nir_imm_int(b, 0); nir_ssa_def *dont_care = nir_ssa_undef(b, 1, 32); - nir_if *if_elected = nir_push_if(b, nir_build_elect(b, 1)); + nir_if *if_elected = nir_push_if(b, nir_elect(b, 1)); { - loaded_num_prm = nir_build_load_shared(b, 1, 32, zero, .base = s->numprims_lds_addr); + loaded_num_prm = nir_load_shared(b, 1, 32, zero, .base = s->numprims_lds_addr); } nir_pop_if(b, if_elected); loaded_num_prm = nir_if_phi(b, loaded_num_prm, dont_care); - nir_ssa_def *num_prm = nir_build_read_first_invocation(b, loaded_num_prm); + nir_ssa_def *num_prm = nir_read_first_invocation(b, loaded_num_prm); nir_ssa_def *num_vtx = nir_imm_int(b, shader->info.mesh.max_vertices_out); /* If the shader doesn't actually create any primitives, don't allocate any output. */ num_vtx = nir_bcsel(b, nir_ieq_imm(b, num_prm, 0), nir_imm_int(b, 0), num_vtx); /* Emit GS_ALLOC_REQ on Wave 0 to let the HW know the output size. */ - nir_ssa_def *wave_id = nir_build_load_subgroup_id(b); + nir_ssa_def *wave_id = nir_load_subgroup_id(b); nir_if *if_wave_0 = nir_push_if(b, nir_ieq_imm(b, wave_id, 0)); { - nir_build_alloc_vertices_and_primitives_amd(b, num_vtx, num_prm); + nir_alloc_vertices_and_primitives_amd(b, num_vtx, num_prm); } nir_pop_if(b, if_wave_0); - nir_ssa_def *invocation_index = nir_build_load_local_invocation_index(b); + nir_ssa_def *invocation_index = nir_load_local_invocation_index(b); /* Load vertex/primitive attributes from shared memory and * emit store_output intrinsics for them. @@ -2379,7 +2379,7 @@ emit_ms_finale(nir_shader *shader, lower_ngg_ms_state *s) /* All per-vertex attributes. */ ms_emit_arrayed_outputs(b, invocation_index, s->per_vertex_outputs, s->num_per_vertex_outputs, s->vertex_attr_lds_addr, s); - nir_build_export_vertex_amd(b); + nir_export_vertex_amd(b); } nir_pop_if(b, if_has_output_vertex); @@ -2393,14 +2393,14 @@ emit_ms_finale(nir_shader *shader, lower_ngg_ms_state *s) /* Primitive connectivity data: describes which vertices the primitive uses. */ nir_ssa_def *prim_idx_addr = nir_imul_imm(b, invocation_index, s->vertices_per_prim); - nir_ssa_def *indices_loaded = nir_build_load_shared(b, s->vertices_per_prim, 8, prim_idx_addr, .base = s->prim_vtx_indices_addr); + nir_ssa_def *indices_loaded = nir_load_shared(b, s->vertices_per_prim, 8, prim_idx_addr, .base = s->prim_vtx_indices_addr); nir_ssa_def *indices[3]; indices[0] = nir_u2u32(b, nir_channel(b, indices_loaded, 0)); indices[1] = s->vertices_per_prim > 1 ? nir_u2u32(b, nir_channel(b, indices_loaded, 1)) : NULL; indices[2] = s->vertices_per_prim > 2 ? nir_u2u32(b, nir_channel(b, indices_loaded, 2)) : NULL; nir_ssa_def *prim_exp_arg = emit_pack_ngg_prim_exp_arg(b, s->vertices_per_prim, indices, NULL, false); - nir_build_export_primitive_amd(b, prim_exp_arg); + nir_export_primitive_amd(b, prim_exp_arg); } nir_pop_if(b, if_has_output_primitive); } @@ -2472,7 +2472,7 @@ ac_nir_lower_ngg_ms(nir_shader *shader, unsigned num_ms_invocations = b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2]; - nir_ssa_def *invocation_index = nir_build_load_local_invocation_index(b); + nir_ssa_def *invocation_index = nir_load_local_invocation_index(b); nir_ssa_def *has_ms_invocation = nir_ult(b, invocation_index, nir_imm_int(b, num_ms_invocations)); nir_if *if_has_ms_invocation = nir_push_if(b, has_ms_invocation); nir_cf_reinsert(&extracted, b->cursor); diff --git a/src/amd/common/ac_nir_lower_tess_io_to_mem.c b/src/amd/common/ac_nir_lower_tess_io_to_mem.c index ce8eba7..dd2951d 100644 --- a/src/amd/common/ac_nir_lower_tess_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_tess_io_to_mem.c @@ -215,15 +215,15 @@ lower_ls_output_store(nir_builder *b, b->cursor = nir_before_instr(instr); - nir_ssa_def *vertex_idx = nir_build_load_local_invocation_index(b); + nir_ssa_def *vertex_idx = nir_load_local_invocation_index(b); nir_ssa_def *base_off_var = nir_imul_imm(b, vertex_idx, st->tcs_num_reserved_inputs * 16u); nir_ssa_def *io_off = nir_build_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u); unsigned write_mask = nir_intrinsic_write_mask(intrin); nir_ssa_def *off = nir_iadd_nuw(b, base_off_var, io_off); - nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask, - .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); + nir_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask, + .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); /* NOTE: don't remove the store_output intrinsic on GFX9+ when tcs_in_out_eq, * it will be used by same-invocation TCS input loads. @@ -269,8 +269,8 @@ hs_per_vertex_input_lds_offset(nir_builder *b, nir_intrinsic_instr *instr) { unsigned tcs_in_vertex_stride = st->tcs_num_reserved_inputs * 16u; - nir_ssa_def *tcs_in_vtxcnt = nir_build_load_patch_vertices_in(b); - nir_ssa_def *rel_patch_id = nir_build_load_tess_rel_patch_id_amd(b); + nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b); + nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b); nir_ssa_def *tcs_in_patch_stride = nir_imul_imm(b, tcs_in_vtxcnt, tcs_in_vertex_stride); nir_ssa_def *tcs_in_current_patch_offset = nir_imul(b, rel_patch_id, tcs_in_patch_stride); @@ -296,8 +296,8 @@ hs_output_lds_offset(nir_builder *b, unsigned pervertex_output_patch_size = b->shader->info.tess.tcs_vertices_out * output_vertex_size; unsigned output_patch_stride = pervertex_output_patch_size + st->tcs_num_reserved_patch_outputs * 16u; - nir_ssa_def *tcs_in_vtxcnt = nir_build_load_patch_vertices_in(b); - nir_ssa_def *tcs_num_patches = nir_build_load_tcs_num_patches_amd(b); + nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b); + nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b); nir_ssa_def *input_patch_size = nir_imul_imm(b, tcs_in_vtxcnt, st->tcs_num_reserved_inputs * 16u); nir_ssa_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches); @@ -305,7 +305,7 @@ hs_output_lds_offset(nir_builder *b, ? nir_build_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u) : nir_imm_int(b, 0); - nir_ssa_def *rel_patch_id = nir_build_load_tess_rel_patch_id_amd(b); + nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b); nir_ssa_def *patch_offset = nir_imul_imm(b, rel_patch_id, output_patch_stride); nir_ssa_def *output_patch_offset = nir_iadd_nuw(b, patch_offset, output_patch0_offset); @@ -328,13 +328,13 @@ hs_per_vertex_output_vmem_offset(nir_builder *b, { nir_ssa_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL ? nir_imm_int(b, b->shader->info.tess.tcs_vertices_out) - : nir_build_load_patch_vertices_in(b); + : nir_load_patch_vertices_in(b); - nir_ssa_def *tcs_num_patches = nir_build_load_tcs_num_patches_amd(b); + nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b); nir_ssa_def *attr_stride = nir_imul(b, tcs_num_patches, nir_imul_imm(b, out_vertices_per_patch, 16u)); nir_ssa_def *io_offset = nir_build_calc_io_offset(b, intrin, attr_stride, 4u); - nir_ssa_def *rel_patch_id = nir_build_load_tess_rel_patch_id_amd(b); + nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b); nir_ssa_def *patch_offset = nir_imul(b, rel_patch_id, nir_imul_imm(b, out_vertices_per_patch, 16u)); nir_ssa_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1); @@ -351,9 +351,9 @@ hs_per_patch_output_vmem_offset(nir_builder *b, { nir_ssa_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL ? nir_imm_int(b, b->shader->info.tess.tcs_vertices_out) - : nir_build_load_patch_vertices_in(b); + : nir_load_patch_vertices_in(b); - nir_ssa_def *tcs_num_patches = nir_build_load_tcs_num_patches_amd(b); + nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b); nir_ssa_def *per_vertex_output_patch_size = nir_imul_imm(b, out_vertices_per_patch, st->tcs_num_reserved_outputs * 16u); nir_ssa_def *per_patch_data_offset = nir_imul(b, tcs_num_patches, per_vertex_output_patch_size); @@ -364,7 +364,7 @@ hs_per_patch_output_vmem_offset(nir_builder *b, if (const_base_offset) off = nir_iadd_nuw(b, off, nir_imul_imm(b, tcs_num_patches, const_base_offset)); - nir_ssa_def *rel_patch_id = nir_build_load_tess_rel_patch_id_amd(b); + nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b); nir_ssa_def *patch_offset = nir_imul_imm(b, rel_patch_id, 16u); off = nir_iadd_nuw(b, off, per_patch_data_offset); return nir_iadd_nuw(b, off, patch_offset); @@ -379,8 +379,8 @@ lower_hs_per_vertex_input_load(nir_builder *b, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); nir_ssa_def *off = hs_per_vertex_input_lds_offset(b, st, intrin); - return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off, - .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); + return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off, + .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); } static void @@ -404,9 +404,9 @@ lower_hs_output_store(nir_builder *b, ? hs_per_vertex_output_vmem_offset(b, st, intrin) : hs_per_patch_output_vmem_offset(b, st, intrin, 0); - nir_ssa_def *hs_ring_tess_offchip = nir_build_load_ring_tess_offchip_amd(b); - nir_ssa_def *offchip_offset = nir_build_load_ring_tess_offchip_offset_amd(b); - nir_build_store_buffer_amd(b, store_val, hs_ring_tess_offchip, vmem_off, offchip_offset, .write_mask = write_mask, .memory_modes = nir_var_shader_out); + nir_ssa_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b); + nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b); + nir_store_buffer_amd(b, store_val, hs_ring_tess_offchip, vmem_off, offchip_offset, .write_mask = write_mask, .memory_modes = nir_var_shader_out); } if (write_to_lds) { @@ -417,8 +417,8 @@ lower_hs_output_store(nir_builder *b, st->tcs_tess_lvl_out_loc = nir_intrinsic_base(intrin) * 16u; nir_ssa_def *lds_off = hs_output_lds_offset(b, st, intrin); - nir_build_store_shared(b, store_val, lds_off, .write_mask = write_mask, - .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); + nir_store_shared(b, store_val, lds_off, .write_mask = write_mask, + .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); } } @@ -428,8 +428,8 @@ lower_hs_output_load(nir_builder *b, lower_tess_io_state *st) { nir_ssa_def *off = hs_output_lds_offset(b, st, intrin); - return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off, - .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); + return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off, + .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u); } static void @@ -507,27 +507,27 @@ hs_emit_write_tess_factors(nir_shader *shader, nir_scoped_barrier(b, .execution_scope=NIR_SCOPE_WORKGROUP, .memory_scope=NIR_SCOPE_WORKGROUP, .memory_semantics=NIR_MEMORY_ACQ_REL, .memory_modes=nir_var_shader_out|nir_var_mem_shared); - nir_ssa_def *invocation_id = nir_build_load_invocation_id(b); + nir_ssa_def *invocation_id = nir_load_invocation_id(b); /* Only the 1st invocation of each patch needs to do this. */ nir_if *invocation_id_zero = nir_push_if(b, nir_ieq_imm(b, invocation_id, 0)); /* The descriptor where tess factors have to be stored by the shader. */ - nir_ssa_def *tessfactor_ring = nir_build_load_ring_tess_factors_amd(b); + nir_ssa_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b); /* Base LDS address of per-patch outputs in the current patch. */ nir_ssa_def *lds_base = hs_output_lds_offset(b, st, NULL); /* Load all tessellation factors (aka. tess levels) from LDS. */ - nir_ssa_def *tessfactors_outer = nir_build_load_shared(b, outer_comps, 32, lds_base, .base = st->tcs_tess_lvl_out_loc, - .align_mul = 16u, .align_offset = st->tcs_tess_lvl_out_loc % 16u); + nir_ssa_def *tessfactors_outer = nir_load_shared(b, outer_comps, 32, lds_base, .base = st->tcs_tess_lvl_out_loc, + .align_mul = 16u, .align_offset = st->tcs_tess_lvl_out_loc % 16u); nir_ssa_def *tessfactors_inner = inner_comps - ? nir_build_load_shared(b, inner_comps, 32, lds_base, .base = st->tcs_tess_lvl_in_loc, - .align_mul = 16u, .align_offset = st->tcs_tess_lvl_in_loc % 16u) + ? nir_load_shared(b, inner_comps, 32, lds_base, .base = st->tcs_tess_lvl_in_loc, + .align_mul = 16u, .align_offset = st->tcs_tess_lvl_in_loc % 16u) : NULL; - nir_ssa_def *rel_patch_id = nir_build_load_tess_rel_patch_id_amd(b); - nir_ssa_def *tess_factors_base = nir_build_load_ring_tess_factors_offset_amd(b); + nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b); + nir_ssa_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b); nir_ssa_def *tess_factors_offset = nir_imul_imm(b, rel_patch_id, (inner_comps + outer_comps) * 4u); unsigned tess_factors_const_offset = 0; @@ -535,7 +535,7 @@ hs_emit_write_tess_factors(nir_shader *shader, /* Store the dynamic HS control word. */ nir_if *rel_patch_id_zero = nir_push_if(b, nir_ieq_imm(b, rel_patch_id, 0)); nir_ssa_def *ctrlw = nir_imm_int(b, 0x80000000u); - nir_build_store_buffer_amd(b, ctrlw, tessfactor_ring, nir_imm_zero(b, 1, 32), tess_factors_base); + nir_store_buffer_amd(b, ctrlw, tessfactor_ring, nir_imm_zero(b, 1, 32), tess_factors_base); tess_factors_const_offset += 4; nir_pop_if(b, rel_patch_id_zero); } @@ -544,27 +544,27 @@ hs_emit_write_tess_factors(nir_shader *shader, if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) { /* LINES reversal */ nir_ssa_def *t = nir_vec2(b, nir_channel(b, tessfactors_outer, 1), nir_channel(b, tessfactors_outer, 0)); - nir_build_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset); + nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset); } else if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES) { nir_ssa_def *t = nir_vec4(b, nir_channel(b, tessfactors_outer, 0), nir_channel(b, tessfactors_outer, 1), nir_channel(b, tessfactors_outer, 2), nir_channel(b, tessfactors_inner, 0)); - nir_build_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset); + nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset); } else { - nir_build_store_buffer_amd(b, tessfactors_outer, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset); - nir_build_store_buffer_amd(b, tessfactors_inner, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset + 4u * outer_comps); + nir_store_buffer_amd(b, tessfactors_outer, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset); + nir_store_buffer_amd(b, tessfactors_inner, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset + 4u * outer_comps); } if (st->tes_reads_tessfactors) { /* Store to offchip for TES to read - only if TES actually reads them */ - nir_ssa_def *hs_ring_tess_offchip = nir_build_load_ring_tess_offchip_amd(b); - nir_ssa_def *offchip_offset = nir_build_load_ring_tess_offchip_offset_amd(b); + nir_ssa_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b); + nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b); nir_ssa_def *vmem_off_outer = hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_out_loc); - nir_build_store_buffer_amd(b, tessfactors_outer, hs_ring_tess_offchip, vmem_off_outer, offchip_offset, .memory_modes = nir_var_shader_out); + nir_store_buffer_amd(b, tessfactors_outer, hs_ring_tess_offchip, vmem_off_outer, offchip_offset, .memory_modes = nir_var_shader_out); if (inner_comps) { nir_ssa_def *vmem_off_inner = hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_in_loc); - nir_build_store_buffer_amd(b, tessfactors_inner, hs_ring_tess_offchip, vmem_off_inner, offchip_offset, .memory_modes = nir_var_shader_out); + nir_store_buffer_amd(b, tessfactors_inner, hs_ring_tess_offchip, vmem_off_inner, offchip_offset, .memory_modes = nir_var_shader_out); } } @@ -581,13 +581,13 @@ lower_tes_input_load(nir_builder *b, lower_tess_io_state *st = (lower_tess_io_state *) state; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - nir_ssa_def *offchip_ring = nir_build_load_ring_tess_offchip_amd(b); - nir_ssa_def *offchip_offset = nir_build_load_ring_tess_offchip_offset_amd(b); + nir_ssa_def *offchip_ring = nir_load_ring_tess_offchip_amd(b); + nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b); nir_ssa_def *off = intrin->intrinsic == nir_intrinsic_load_per_vertex_input ? hs_per_vertex_output_vmem_offset(b, st, intrin) : hs_per_patch_output_vmem_offset(b, st, intrin, 0); - return nir_build_load_buffer_amd(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, offchip_ring, off, offchip_offset); + return nir_load_buffer_amd(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, offchip_ring, off, offchip_offset); } static bool