From d382ceea2beaf14478c986c4720898b2b36259f2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 3 Jan 2022 14:04:57 -0500 Subject: [PATCH] ac/llvm: remove the num_channels parameter from ac_build_buffer_store_dword It was used when LLVM didn't support vec3 and we had to pass vec4 with num_channels=3. We no longer need to do that. This also removes the vec3 splitting or conversion to vec4 in callers. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/llvm/ac_llvm_build.c | 17 ++++++++------- src/amd/llvm/ac_llvm_build.h | 4 ++-- src/amd/llvm/ac_nir_to_llvm.c | 9 +++----- src/amd/vulkan/radv_nir_to_llvm.c | 13 ++++-------- src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 4 ++-- src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 24 +++++++++------------- src/gallium/drivers/radeonsi/si_shader_llvm_vs.c | 11 ++-------- 7 files changed, 31 insertions(+), 51 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index fb2519e..b5cef35 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -1156,15 +1156,14 @@ void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true, true); } -/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. - * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), - * or v4i32 (num_channels=3,4). - */ +/* buffer_store_dword(,x2,x3,x4) <- the suffix is selected by the type of vdata. */ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, - unsigned num_channels, LLVMValueRef vindex, LLVMValueRef voffset, - LLVMValueRef soffset, unsigned inst_offset, unsigned cache_policy) + LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned inst_offset, unsigned cache_policy) { - /* Split 3 channel stores. */ + unsigned num_channels = ac_get_llvm_num_components(vdata); + + /* Split 3 channel stores if unsupported. */ if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) { LLVMValueRef v[3], v01; @@ -1173,8 +1172,8 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, } v01 = ac_build_gather_values(ctx, v, 2); - ac_build_buffer_store_dword(ctx, rsrc, v01, 2, vindex, voffset, soffset, inst_offset, cache_policy); - ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, vindex, voffset, soffset, inst_offset + 8, + ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, inst_offset, cache_policy); + ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset, soffset, inst_offset + 8, cache_policy); return; } diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 1fc9b55..45e62e2 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -254,8 +254,8 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, - unsigned num_channels, LLVMValueRef vindex, LLVMValueRef voffset, - LLVMValueRef soffset, unsigned inst_offset, unsigned cache_policy); + LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, + unsigned inst_offset, unsigned cache_policy); void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data, LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index ee44d55..e38b7bf 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1806,7 +1806,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in u_bit_scan_consecutive_range(&writemask, &start, &count); - if (count == 3 && (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) { + if (count == 3 && elem_size_bytes != 4) { writemask |= 1 << (start + 2); count = 2; } @@ -1846,8 +1846,6 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in } else if (num_bytes == 2) { ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy); } else { - int num_channels = num_bytes / 4; - switch (num_bytes) { case 16: /* v4f32 */ data_type = ctx->ac.v4f32; @@ -1866,7 +1864,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in } data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, ""); - ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, NULL, offset, + ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset, ctx->ac.i32_0, 0, cache_policy); } } @@ -4198,7 +4196,6 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins LLVMValueRef descriptor = get_src(ctx, instr->src[1]); LLVMValueRef addr_voffset = get_src(ctx, instr->src[2]); LLVMValueRef addr_soffset = get_src(ctx, instr->src[3]); - unsigned num_components = instr->src[0].ssa->num_components; unsigned const_offset = nir_intrinsic_base(instr); bool swizzled = nir_intrinsic_is_swizzled(instr); bool slc = nir_intrinsic_slc_amd(instr); @@ -4209,7 +4206,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins if (slc) cache_policy |= ac_slc; - ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data, num_components, + ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data, NULL, addr_voffset, addr_soffset, const_offset, cache_policy); break; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index edcdfa2..1023104 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -338,7 +338,7 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMV out_val = ac_to_integer(&ctx->ac, out_val); out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); - ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, 1, NULL, voffset, + ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, voffset, ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset), 0, ac_glc | ac_slc | ac_swizzled); } @@ -1092,18 +1092,13 @@ radv_emit_stream_output(struct radv_shader_context *ctx, LLVMValueRef const *so_ vdata = out[0]; break; case 2: /* as v2i32 */ - case 3: /* as v4i32 (aligned to 4) */ - out[3] = LLVMGetUndef(ctx->ac.i32); - FALLTHROUGH; + case 3: /* as v3i32 */ case 4: /* as v4i32 */ - vdata = ac_build_gather_values(&ctx->ac, out, - !ac_has_vec3_support(ctx->ac.chip_class, false) - ? util_next_power_of_two(num_comps) - : num_comps); + vdata = ac_build_gather_values(&ctx->ac, out, num_comps); break; } - ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, num_comps, NULL, + ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, NULL, so_write_offsets[buf], ctx->ac.i32_0, offset, ac_glc | ac_slc); } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index be679bc..57c2c81 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -176,7 +176,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi) continue; } - ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, NULL, NULL, + ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, NULL, NULL, ac_get_arg(&ctx->ac, ctx->args.es2gs_offset), (4 * param + chan) * 4, ac_glc | ac_slc | ac_swizzled); } @@ -277,7 +277,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM out_val = ac_to_integer(&ctx->ac, out_val); - ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, 1, NULL, + ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, voffset, soffset, 0, ac_glc | ac_slc | ac_swizzled); } } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index c1b4e64..d888f6b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -537,7 +537,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, values[chan] = value; if (writemask != 0xF && !is_tess_factor) { - ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, NULL, addr, base, + ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, 4 * chan, ac_glc); } @@ -555,7 +555,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, if (writemask == 0xF && !is_tess_factor) { LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4); - ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, NULL, addr, base, 0, ac_glc); + ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, 0, ac_glc); } } @@ -662,7 +662,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx) LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr); - ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, NULL, buffer_addr, buffer_offset, 0, + ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset, 0, ac_glc); } } @@ -774,18 +774,18 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re if (ctx->screen->info.chip_class <= GFX8) { ac_build_ifcc(&ctx->ac, LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504); - ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1, + ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), NULL, ctx->ac.i32_0, tf_base, offset, ac_glc); ac_build_endif(&ctx->ac, 6504); offset += 4; } /* Store the tessellation factors. */ - ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), NULL, byteoffset, + ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL, byteoffset, tf_base, offset, ac_glc); offset += 16; if (vec1) - ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, NULL, byteoffset, + ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL, byteoffset, tf_base, offset, ac_glc); /* Store the tess factors into the offchip buffer if TES reads them. */ @@ -801,21 +801,17 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, LLVMConstInt(ctx->ac.i32, param_outer, 0)); - unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false) - ? outer_comps - : util_next_power_of_two(outer_comps); - outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size); + outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_comps); - ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, NULL, tf_outer_offset, + ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, NULL, tf_outer_offset, base, 0, ac_glc); if (inner_comps) { param_inner = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER); tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, LLVMConstInt(ctx->ac.i32, param_inner, 0)); - inner_vec = - inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps); - ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, NULL, + inner_vec = ac_build_gather_values(&ctx->ac, inner, inner_comps); + ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, NULL, tf_inner_offset, base, 0, ac_glc); } } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 1fed7cb..9b77c8b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -298,19 +298,12 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef break; case 2: /* as v2i32 */ case 3: /* as v3i32 */ - if (ac_has_vec3_support(ctx->screen->info.chip_class, false)) { - vdata = ac_build_gather_values(&ctx->ac, out, num_comps); - break; - } - /* as v4i32 (aligned to 4) */ - out[3] = LLVMGetUndef(ctx->ac.i32); - FALLTHROUGH; case 4: /* as v4i32 */ - vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps)); + vdata = ac_build_gather_values(&ctx->ac, out, num_comps); break; } - ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, num_comps, NULL, + ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, NULL, so_write_offsets[buf_idx], ctx->ac.i32_0, stream_out->dst_offset * 4, ac_glc | ac_slc); } -- 2.7.4