From dc04e4bba2cb3a2831a4413e8de51497cfbd2bc9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 20 Mar 2018 19:14:57 -0400 Subject: [PATCH] radeonsi: move FMASK shader logic to shared code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We'll need it for FBFETCH in both TGSI and NIR paths. Tested-by: Dieter Nützel --- src/amd/common/ac_llvm_build.c | 56 +++++++++++++++++ src/amd/common/ac_llvm_build.h | 3 + src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 74 +---------------------- 3 files changed, 61 insertions(+), 72 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 6f577cd..32d8a02 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2435,3 +2435,59 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, } return value; } + +/* Adjust the sample index according to FMASK. + * + * For uncompressed MSAA surfaces, FMASK should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * For example, 0x11111100 means there are only 2 samples stored and + * the second sample covers 3/4 of the pixel. When reading samples 0 + * and 1, return physical sample 0 (determined by the first two 0s + * in FMASK), otherwise return physical sample 1. + * + * The sample index should be adjusted as follows: + * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF; + */ +void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, + LLVMValueRef *addr, bool is_array_tex) +{ + struct ac_image_args fmask_load = {}; + fmask_load.opcode = ac_image_load; + fmask_load.resource = fmask; + fmask_load.dmask = 0xf; + fmask_load.da = is_array_tex; + + LLVMValueRef fmask_addr[4]; + memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3); + fmask_addr[3] = LLVMGetUndef(ac->i32); + + fmask_load.addr = ac_build_gather_values(ac, fmask_addr, + is_array_tex ? 4 : 2); + + LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); + fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, + ac->i32_0, ""); + + /* Apply the formula. */ + unsigned sample_chan = is_array_tex ? 3 : 2; + LLVMValueRef final_sample; + final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], + LLVMConstInt(ac->i32, 4, 0), ""); + final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, ""); + final_sample = LLVMBuildAnd(ac->builder, final_sample, + LLVMConstInt(ac->i32, 0xF, 0), ""); + + /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK + * resource descriptor is 0 (invalid), + */ + LLVMValueRef tmp; + tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, ""); + tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, ""); + tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, ""); + + /* Replace the MSAA sample index. */ + addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample, + addr[sample_chan], ""); +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index f901f33..8b35028 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -414,6 +414,9 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift, unsigned bitwidth); +void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, + LLVMValueRef *addr, bool is_array_tex); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 9e2a0eb..f5fa18f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1506,80 +1506,10 @@ static void tex_fetch_args( for (chan = 0; chan < count; chan++) address[chan] = ac_to_integer(&ctx->ac, address[chan]); - /* Adjust the sample index according to FMASK. - * - * For uncompressed MSAA surfaces, FMASK should return 0x76543210, - * which is the identity mapping. Each nibble says which physical sample - * should be fetched to get that sample. - * - * For example, 0x11111100 means there are only 2 samples stored and - * the second sample covers 3/4 of the pixel. When reading samples 0 - * and 1, return physical sample 0 (determined by the first two 0s - * in FMASK), otherwise return physical sample 1. - * - * The sample index should be adjusted as follows: - * sample_index = (fmask >> (sample_index * 4)) & 0xF; - */ if (target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - struct lp_build_emit_data txf_emit_data = *emit_data; - LLVMValueRef txf_address[4]; - /* We only need .xy for non-arrays, and .xyz for arrays. */ - unsigned txf_count = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; - struct tgsi_full_instruction inst = {}; - - memcpy(txf_address, address, sizeof(txf_address)); - - /* Read FMASK using TXF_LZ. */ - inst.Instruction.Opcode = TGSI_OPCODE_TXF_LZ; - inst.Texture.Texture = target; - txf_emit_data.inst = &inst; - txf_emit_data.chan = 0; - set_tex_fetch_args(ctx, &txf_emit_data, - target, fmask_ptr, NULL, - txf_address, txf_count, 0xf); - build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data); - - /* Initialize some constants. */ - LLVMValueRef four = LLVMConstInt(ctx->i32, 4, 0); - LLVMValueRef F = LLVMConstInt(ctx->i32, 0xF, 0); - - /* Apply the formula. */ - LLVMValueRef fmask = - LLVMBuildExtractElement(ctx->ac.builder, - txf_emit_data.output[0], - ctx->i32_0, ""); - - unsigned sample_chan = txf_count; /* the sample index is last */ - - LLVMValueRef sample_index4 = - LLVMBuildMul(ctx->ac.builder, address[sample_chan], four, ""); - - LLVMValueRef shifted_fmask = - LLVMBuildLShr(ctx->ac.builder, fmask, sample_index4, ""); - - LLVMValueRef final_sample = - LLVMBuildAnd(ctx->ac.builder, shifted_fmask, F, ""); - - /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK - * resource descriptor is 0 (invalid), - */ - LLVMValueRef fmask_desc = - LLVMBuildBitCast(ctx->ac.builder, fmask_ptr, - ctx->v8i32, ""); - - LLVMValueRef fmask_word1 = - LLVMBuildExtractElement(ctx->ac.builder, fmask_desc, - ctx->i32_1, ""); - - LLVMValueRef word1_is_nonzero = - LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, - fmask_word1, ctx->i32_0, ""); - - /* Replace the MSAA sample index. */ - address[sample_chan] = - LLVMBuildSelect(ctx->ac.builder, word1_is_nonzero, - final_sample, address[sample_chan], ""); + ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, address, + target == TGSI_TEXTURE_2D_ARRAY_MSAA); } if (opcode == TGSI_OPCODE_TXF || -- 2.7.4