ac/nir: avoid providing a write_mask to intrinsic builders
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 7 Jan 2022 16:05:27 +0000 (16:05 +0000)
committerMarge Bot <emma+marge@anholt.net>
Fri, 21 Jan 2022 13:45:33 +0000 (13:45 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14455>

src/amd/common/ac_nir_lower_esgs_io_to_mem.c
src/amd/common/ac_nir_lower_ngg.c
src/amd/common/ac_nir_lower_tess_io_to_mem.c

index 0e8fe59..5fff4de 100644 (file)
@@ -100,7 +100,7 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s
 
          nir_ssa_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u);
          nir_build_store_buffer_amd(b, store_val, desc, v_off, s_off, .is_swizzled = swizzled, .slc_amd = slc,
-                                    .base = start_byte, .write_mask = 1u, .memory_modes = nir_var_shader_out);
+                                    .base = start_byte, .memory_modes = nir_var_shader_out);
 
          start_byte += store_bytes;
          bytes -= store_bytes;
index 61b8a2d..203234f 100644 (file)
@@ -267,7 +267,7 @@ repack_invocations_in_workgroup(nir_builder *b, nir_ssa_def *input_bool,
    nir_ssa_def *dont_care = nir_ssa_undef(b, 1, num_lds_dwords * 32);
    nir_if *if_first_lane = nir_push_if(b, nir_build_elect(b, 1));
 
-   nir_build_store_shared(b, nir_u2u8(b, surviving_invocations_in_current_wave), wave_id, .base = lds_addr_base, .align_mul = 1u, .write_mask = 0x1u);
+   nir_build_store_shared(b, nir_u2u8(b, surviving_invocations_in_current_wave), wave_id, .base = lds_addr_base, .align_mul = 1u);
 
    nir_scoped_barrier(b, .execution_scope=NIR_SCOPE_WORKGROUP, .memory_scope=NIR_SCOPE_WORKGROUP,
                          .memory_semantics=NIR_MEMORY_ACQ_REL, .memory_modes=nir_var_mem_shared);
@@ -384,7 +384,7 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def
          nir_ssa_def *provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx);
          nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u);
 
-         nir_build_store_shared(b,  prim_id, addr, .write_mask = 1u, .align_mul = 4u);
+         nir_build_store_shared(b,  prim_id, addr, .align_mul = 4u);
       }
 
       nir_build_export_primitive_amd(b, arg);
@@ -420,7 +420,7 @@ emit_store_ngg_nogs_es_primitive_id(nir_builder *b)
 
    nir_build_store_output(b, prim_id, nir_imm_zero(b, 1, 32),
                           .base = io_sem.location,
-                          .write_mask = 1u, .src_type = nir_type_uint32, .io_semantics = io_sem);
+                          .src_type = nir_type_uint32, .io_semantics = io_sem);
 }
 
 static bool
@@ -715,16 +715,16 @@ compact_vertices_after_culling(nir_builder *b,
       nir_ssa_def *exporter_addr = pervertex_lds_addr(b, es_exporter_tid, pervertex_lds_bytes);
 
       /* Store the exporter thread's index to the LDS space of the current thread so GS threads can load it */
-      nir_build_store_shared(b, nir_u2u8(b, es_exporter_tid), es_vertex_lds_addr, .base = lds_es_exporter_tid, .align_mul = 1u, .write_mask = 0x1u);
+      nir_build_store_shared(b, nir_u2u8(b, es_exporter_tid), es_vertex_lds_addr, .base = lds_es_exporter_tid, .align_mul = 1u);
 
       /* Store the current thread's position output to the exporter thread's LDS space */
       nir_ssa_def *pos = nir_load_var(b, position_value_var);
-      nir_build_store_shared(b, pos, exporter_addr, .base = lds_es_pos_x, .align_mul = 4u, .write_mask = 0xfu);
+      nir_build_store_shared(b, pos, exporter_addr, .base = lds_es_pos_x, .align_mul = 4u);
 
       /* Store the current thread's repackable arguments to the exporter thread's LDS space */
       for (unsigned i = 0; i < max_exported_args; ++i) {
          nir_ssa_def *arg_val = nir_load_var(b, repacked_arg_vars[i]);
-         nir_intrinsic_instr *store = nir_build_store_shared(b, arg_val, exporter_addr, .base = lds_es_arg_0 + 4u * i, .align_mul = 4u, .write_mask = 0x1u);
+         nir_intrinsic_instr *store = nir_build_store_shared(b, arg_val, exporter_addr, .base = lds_es_arg_0 + 4u * i, .align_mul = 4u);
 
          nogs_state->compact_arg_stores[i] = &store->instr;
       }
@@ -1139,13 +1139,13 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
          /* Store position components that are relevant to culling in LDS */
          nir_ssa_def *pre_cull_pos = nir_load_var(b, position_value_var);
          nir_ssa_def *pre_cull_w = nir_channel(b, pre_cull_pos, 3);
-         nir_build_store_shared(b, pre_cull_w, es_vertex_lds_addr, .write_mask = 0x1u, .align_mul = 4, .base = lds_es_pos_w);
+         nir_build_store_shared(b, pre_cull_w, es_vertex_lds_addr, .align_mul = 4, .base = lds_es_pos_w);
          nir_ssa_def *pre_cull_x_div_w = nir_fdiv(b, nir_channel(b, pre_cull_pos, 0), pre_cull_w);
          nir_ssa_def *pre_cull_y_div_w = nir_fdiv(b, nir_channel(b, pre_cull_pos, 1), pre_cull_w);
-         nir_build_store_shared(b, nir_vec2(b, pre_cull_x_div_w, pre_cull_y_div_w), es_vertex_lds_addr, .write_mask = 0x3u, .align_mul = 4, .base = lds_es_pos_x);
+         nir_build_store_shared(b, nir_vec2(b, pre_cull_x_div_w, pre_cull_y_div_w), es_vertex_lds_addr, .align_mul = 4, .base = lds_es_pos_x);
 
          /* Clear out the ES accepted flag in LDS */
-         nir_build_store_shared(b, nir_imm_zero(b, 1, 8), es_vertex_lds_addr, .write_mask = 0x1u, .align_mul = 4, .base = lds_es_vertex_accepted);
+         nir_build_store_shared(b, nir_imm_zero(b, 1, 8), es_vertex_lds_addr, .align_mul = 4, .base = lds_es_vertex_accepted);
       }
       nir_pop_if(b, if_es_thread);
 
@@ -1188,7 +1188,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
          {
             /* Store the accepted state to LDS for ES threads */
             for (unsigned vtx = 0; vtx < 3; ++vtx)
-               nir_build_store_shared(b, nir_imm_intN_t(b, 0xff, 8), vtx_addr[vtx], .base = lds_es_vertex_accepted, .align_mul = 4u, .write_mask = 0x1u);
+               nir_build_store_shared(b, nir_imm_intN_t(b, 0xff, 8), vtx_addr[vtx], .base = lds_es_vertex_accepted, .align_mul = 4u);
          }
          nir_pop_if(b, if_gs_accepted);
       }
@@ -1405,7 +1405,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
 
       nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var);
       nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 };
-      nir_build_store_output(b, pos_val, nir_imm_int(b, 0), .base = VARYING_SLOT_POS, .component = 0, .io_semantics = io_sem, .write_mask = 0xfu);
+      nir_build_store_output(b, pos_val, nir_imm_int(b, 0), .base = VARYING_SLOT_POS, .component = 0, .io_semantics = io_sem);
    }
 
    nir_metadata_preserve(impl, nir_metadata_none);
@@ -1480,7 +1480,7 @@ ngg_gs_clear_primflags(nir_builder *b, nir_ssa_def *num_vertices, unsigned strea
       nir_push_else(b, if_break);
       {
          nir_ssa_def *emit_vtx_addr = ngg_gs_emit_vertex_addr(b, current_clear_primflag_idx, s);
-         nir_build_store_shared(b, zero_u8, emit_vtx_addr, .base = s->lds_offs_primflags + stream, .align_mul = 1, .write_mask = 0x1u);
+         nir_build_store_shared(b, zero_u8, emit_vtx_addr, .base = s->lds_offs_primflags + stream, .align_mul = 1);
          nir_store_var(b, s->current_clear_primflag_idx_var, nir_iadd_imm_nuw(b, current_clear_primflag_idx, 1), 0x1u);
       }
       nir_pop_if(b, if_break);
@@ -1604,7 +1604,7 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri
          if (info->bit_size != 32)
             out_val = nir_u2u(b, out_val, info->bit_size);
 
-         nir_build_store_shared(b, out_val, gs_emit_vtx_addr, .base = packed_location * 16 + comp * 4, .align_mul = 4, .write_mask = 0x1u);
+         nir_build_store_shared(b, out_val, gs_emit_vtx_addr, .base = packed_location * 16 + comp * 4, .align_mul = 4);
 
          /* Clear the variable that holds the output */
          nir_store_var(b, s->output_vars[slot][comp], nir_ssa_undef(b, 1, 32), 0x1u);
@@ -1625,7 +1625,7 @@ lower_ngg_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intri
       prim_flag = nir_iadd_nuw(b, prim_flag, nir_ishl(b, odd, nir_imm_int(b, 1)));
    }
 
-   nir_build_store_shared(b, nir_u2u8(b, prim_flag), gs_emit_vtx_addr, .base = s->lds_offs_primflags + stream, .align_mul = 4u, .write_mask = 0x1u);
+   nir_build_store_shared(b, nir_u2u8(b, prim_flag), gs_emit_vtx_addr, .base = s->lds_offs_primflags + stream, .align_mul = 4u);
    nir_instr_remove(&intrin->instr);
    return true;
 }
@@ -1757,7 +1757,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_ssa_def *max_num_out_vtx, nir_ssa_def
             continue;
 
          nir_ssa_def *load = nir_build_load_shared(b, 1, info->bit_size, exported_out_vtx_lds_addr, .base = packed_location * 16u + comp * 4u, .align_mul = 4u);
-         nir_build_store_output(b, load, nir_imm_int(b, 0), .write_mask = 0x1u, .base = slot, .component = comp, .io_semantics = io_sem);
+         nir_build_store_output(b, load, nir_imm_int(b, 0), .base = slot, .component = comp, .io_semantics = io_sem);
       }
    }
 
@@ -1779,7 +1779,7 @@ ngg_gs_setup_vertex_compaction(nir_builder *b, nir_ssa_def *vertex_live, nir_ssa
 
       nir_ssa_def *exporter_lds_addr = ngg_gs_out_vertex_addr(b, exporter_tid_in_tg, s);
       nir_ssa_def *tid_in_tg_u8 = nir_u2u8(b, tid_in_tg);
-      nir_build_store_shared(b, tid_in_tg_u8, exporter_lds_addr, .base = s->lds_offs_primflags + 1, .align_mul = 1u, .write_mask = 0x1u);
+      nir_build_store_shared(b, tid_in_tg_u8, exporter_lds_addr, .base = s->lds_offs_primflags + 1, .align_mul = 1u);
    }
    nir_pop_if(b, if_vertex_live);
 }
@@ -1968,8 +1968,7 @@ lower_ms_store_output(nir_builder *b,
       assert(base == 0);
 
       nir_ssa_def *addr = nir_imm_int(b, 0);
-      nir_build_store_shared(b, nir_u2u32(b, store_val), addr,
-                             .write_mask = 0x1u, .base = s->numprims_lds_addr,
+      nir_build_store_shared(b, nir_u2u32(b, store_val), addr, .base = s->numprims_lds_addr,
                              .align_mul = 4u);
    } else if (io_sem.location == VARYING_SLOT_PRIMITIVE_INDICES) {
       /* Contrary to the name, these are not primitive indices, but
@@ -1979,7 +1978,7 @@ lower_ms_store_output(nir_builder *b,
 
       nir_ssa_def *offset_src = nir_get_io_offset_src(intrin)->ssa;
       nir_build_store_shared(b, nir_u2u8(b, store_val), offset_src,
-                             .write_mask = 0x1u, .base = s->prim_vtx_indices_addr + base,
+                             .base = s->prim_vtx_indices_addr + base,
                              .align_mul = 1u);
    } else {
       unreachable("Invalid mesh shader output");
@@ -2307,8 +2306,8 @@ ms_emit_arrayed_outputs(nir_builder *b,
             ms_load_arrayed_output(b, invocation_index, zero, driver_location, start_comp,
                                    num_components, 32, num_arrayed_outputs, lds_base_addr);
 
-         nir_build_store_output(b, load, nir_imm_int(b, 0), .write_mask = BITFIELD_MASK(num_components),
-                                .base = slot, .component = start_comp, .io_semantics = io_sem);
+         nir_build_store_output(b, load, nir_imm_int(b, 0), .base = slot, .component = start_comp,
+                                .io_semantics = io_sem);
       }
    }
 }
index 1b4a8f0..ce8eba7 100644 (file)
@@ -535,7 +535,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
       /* Store the dynamic HS control word. */
       nir_if *rel_patch_id_zero = nir_push_if(b, nir_ieq_imm(b, rel_patch_id, 0));
       nir_ssa_def *ctrlw = nir_imm_int(b, 0x80000000u);
-      nir_build_store_buffer_amd(b, ctrlw, tessfactor_ring, nir_imm_zero(b, 1, 32), tess_factors_base, .write_mask = 0x1);
+      nir_build_store_buffer_amd(b, ctrlw, tessfactor_ring, nir_imm_zero(b, 1, 32), tess_factors_base);
       tess_factors_const_offset += 4;
       nir_pop_if(b, rel_patch_id_zero);
    }
@@ -544,14 +544,14 @@ hs_emit_write_tess_factors(nir_shader *shader,
    if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
       /* LINES reversal */
       nir_ssa_def *t = nir_vec2(b, nir_channel(b, tessfactors_outer, 1), nir_channel(b, tessfactors_outer, 0));
-      nir_build_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset, .write_mask = 0x3);
+      nir_build_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset);
    } else if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
       nir_ssa_def *t = nir_vec4(b, nir_channel(b, tessfactors_outer, 0), nir_channel(b, tessfactors_outer, 1),
                                 nir_channel(b, tessfactors_outer, 2), nir_channel(b, tessfactors_inner, 0));
-      nir_build_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset, .write_mask = 0xf);
+      nir_build_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset);
    } else {
-      nir_build_store_buffer_amd(b, tessfactors_outer, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset, .write_mask = BITFIELD_MASK(outer_comps));
-      nir_build_store_buffer_amd(b, tessfactors_inner, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset + 4u * outer_comps, .write_mask = BITFIELD_MASK(inner_comps));
+      nir_build_store_buffer_amd(b, tessfactors_outer, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset);
+      nir_build_store_buffer_amd(b, tessfactors_inner, tessfactor_ring, tess_factors_offset, tess_factors_base, .base = tess_factors_const_offset + 4u * outer_comps);
    }
 
    if (st->tes_reads_tessfactors) {
@@ -560,11 +560,11 @@ hs_emit_write_tess_factors(nir_shader *shader,
       nir_ssa_def *offchip_offset = nir_build_load_ring_tess_offchip_offset_amd(b);
 
       nir_ssa_def *vmem_off_outer = hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_out_loc);
-      nir_build_store_buffer_amd(b, tessfactors_outer, hs_ring_tess_offchip, vmem_off_outer, offchip_offset, .write_mask = BITFIELD_MASK(outer_comps), .memory_modes = nir_var_shader_out);
+      nir_build_store_buffer_amd(b, tessfactors_outer, hs_ring_tess_offchip, vmem_off_outer, offchip_offset, .memory_modes = nir_var_shader_out);
 
       if (inner_comps) {
          nir_ssa_def *vmem_off_inner = hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_in_loc);
-         nir_build_store_buffer_amd(b, tessfactors_inner, hs_ring_tess_offchip, vmem_off_inner, offchip_offset, .write_mask = BITFIELD_MASK(inner_comps), .memory_modes = nir_var_shader_out);
+         nir_build_store_buffer_amd(b, tessfactors_inner, hs_ring_tess_offchip, vmem_off_inner, offchip_offset, .memory_modes = nir_var_shader_out);
       }
    }