From bb17ae49ee2591a4a35479ed6e48cb3c18422e2a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 27 Aug 2018 02:03:41 +0100 Subject: [PATCH] gallivm: allow to pass two swizzles into fetches. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This hijacks the top 16-bits of swizzle, to pass in the swizzle for the second channel. This fixes handling .yx swizzles of 64-bit values. This should fixup radeonsi and llvmpipe. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107524 Reviewed-by: Marek Olšák --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 9 +++ src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 86 ++++++++++++++-------- src/gallium/drivers/radeonsi/si_shader.c | 7 +- .../drivers/radeonsi/si_shader_tgsi_setup.c | 18 +++-- 4 files changed, 79 insertions(+), 41 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 64d2cd7..2c3be8f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -353,6 +353,15 @@ lp_build_emit_fetch_src( assert(0 && "invalid swizzle in emit_fetch()"); return bld_base->base.undef; } + if (tgsi_type_is_64bit(stype)) { + unsigned swizzle2; + swizzle2 = tgsi_util_get_full_src_register_swizzle(reg, chan_index + 1); + if (swizzle2 > 3) { + assert(0 && "invalid swizzle in emit_fetch()"); + return bld_base->base.undef; + } + swizzle |= (swizzle2 << 16); + } } assert(reg->Register.Index <= bld_base->info->file_max[reg->Register.File]); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 83d7dbe..79ece63 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1190,7 +1190,7 @@ emit_fetch_constant( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, - unsigned swizzle) + unsigned swizzle_in) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -1200,6 +1200,7 @@ emit_fetch_constant( LLVMValueRef consts_ptr; LLVMValueRef num_consts; LLVMValueRef res; + unsigned swizzle = swizzle_in & 0xffff; /* XXX: Handle fetching xyzw components as a vector */ assert(swizzle != ~0u); @@ -1241,7 +1242,7 @@ emit_fetch_constant( if (tgsi_type_is_64bit(stype)) { LLVMValueRef swizzle_vec2; - swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1); + swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16); index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2); index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2); } @@ -1256,21 +1257,42 @@ emit_fetch_constant( scalar_ptr = LLVMBuildGEP(builder, consts_ptr, &index, 1, ""); - if (stype == TGSI_TYPE_DOUBLE) { - LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); - scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); - bld_broad = &bld_base->dbl_bld; - } else if (stype == TGSI_TYPE_UNSIGNED64) { - LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); - scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, ""); - bld_broad = &bld_base->uint64_bld; - } else if (stype == TGSI_TYPE_SIGNED64) { - LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); - scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, ""); - bld_broad = &bld_base->int64_bld; + + if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) { + + LLVMValueRef scalar2, scalar2_ptr; + LLVMValueRef shuffles[2]; + index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16)); + + scalar2_ptr = LLVMBuildGEP(builder, consts_ptr, + &index, 1, ""); + + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + scalar2 = LLVMBuildLoad(builder, scalar2_ptr, ""); + shuffles[0] = lp_build_const_int32(gallivm, 0); + shuffles[1] = lp_build_const_int32(gallivm, 1); + + res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); + res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], ""); + res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], ""); + } else { + if (stype == TGSI_TYPE_DOUBLE) { + LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); + scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); + bld_broad = &bld_base->dbl_bld; + } else if (stype == TGSI_TYPE_UNSIGNED64) { + LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); + scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, ""); + bld_broad = &bld_base->uint64_bld; + } else if (stype == TGSI_TYPE_SIGNED64) { + LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); + scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, ""); + bld_broad = &bld_base->int64_bld; + } + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + res = lp_build_broadcast_scalar(bld_broad, scalar); } - scalar = LLVMBuildLoad(builder, scalar_ptr, ""); - res = lp_build_broadcast_scalar(bld_broad, scalar); + } if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) { @@ -1319,12 +1341,13 @@ emit_fetch_immediate( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, - unsigned swizzle) + unsigned swizzle_in) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef res = NULL; + unsigned swizzle = swizzle_in & 0xffff; if (bld->use_immediates_array || reg->Register.Indirect) { LLVMValueRef imms_array; @@ -1355,7 +1378,7 @@ emit_fetch_immediate( if (tgsi_type_is_64bit(stype)) index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, indirect_index, - swizzle + 1, + swizzle_in >> 16, FALSE); /* Gather values from the immediate register array */ res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2); @@ -1371,7 +1394,7 @@ emit_fetch_immediate( LLVMValueRef imms_ptr2; LLVMValueRef res2; gep[1] = lp_build_const_int32(gallivm, - reg->Register.Index * 4 + swizzle + 1); + reg->Register.Index * 4 + (swizzle_in >> 16)); imms_ptr2 = LLVMBuildGEP(builder, bld->imms_array, gep, 2, ""); res2 = LLVMBuildLoad(builder, imms_ptr2, ""); @@ -1382,7 +1405,7 @@ emit_fetch_immediate( else { res = bld->immediates[reg->Register.Index][swizzle]; if (tgsi_type_is_64bit(stype)) - res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]); + res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]); } if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { @@ -1397,12 +1420,13 @@ emit_fetch_input( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, - unsigned swizzle) + unsigned swizzle_in) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef res; + unsigned swizzle = swizzle_in & 0xffff; if (reg->Register.Indirect) { LLVMValueRef indirect_index; @@ -1423,7 +1447,7 @@ emit_fetch_input( if (tgsi_type_is_64bit(stype)) { index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, indirect_index, - swizzle + 1, + swizzle_in >> 16, TRUE); } /* cast inputs_array pointer to float* */ @@ -1446,7 +1470,7 @@ emit_fetch_input( LLVMValueRef res2; lindex1 = lp_build_const_int32(gallivm, - reg->Register.Index * 4 + swizzle + 1); + reg->Register.Index * 4 + (swizzle_in >> 16)); input_ptr2 = LLVMBuildGEP(builder, bld->inputs_array, &lindex1, 1, ""); res2 = LLVMBuildLoad(builder, input_ptr2, ""); @@ -1456,7 +1480,7 @@ emit_fetch_input( else { res = bld->inputs[reg->Register.Index][swizzle]; if (tgsi_type_is_64bit(stype)) - res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]); + res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]); } } @@ -1476,7 +1500,7 @@ emit_fetch_gs_input( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, - unsigned swizzle) + unsigned swizzle_in) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); struct gallivm_state *gallivm = bld->bld_base.base.gallivm; @@ -1484,6 +1508,7 @@ emit_fetch_gs_input( LLVMBuilderRef builder = gallivm->builder; LLVMValueRef attrib_index = NULL; LLVMValueRef vertex_index = NULL; + unsigned swizzle = swizzle_in & 0xffff; LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); LLVMValueRef res; @@ -1525,7 +1550,7 @@ emit_fetch_gs_input( assert(res); if (tgsi_type_is_64bit(stype)) { - LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1); + LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16); LLVMValueRef res2; res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, reg->Dimension.Indirect, @@ -1549,12 +1574,13 @@ emit_fetch_temporary( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, - unsigned swizzle) + unsigned swizzle_in) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef res; + unsigned swizzle = swizzle_in & 0xffff; if (reg->Register.Indirect) { LLVMValueRef indirect_index; @@ -1574,7 +1600,7 @@ emit_fetch_temporary( if (tgsi_type_is_64bit(stype)) { index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, indirect_index, - swizzle + 1, + swizzle_in >> 16, TRUE); } @@ -1593,7 +1619,7 @@ emit_fetch_temporary( if (tgsi_type_is_64bit(stype)) { LLVMValueRef temp_ptr2, res2; - temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1); + temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16); res2 = LLVMBuildLoad(builder, temp_ptr2, ""); res = emit_fetch_64bit(bld_base, stype, res, res2); } @@ -1616,7 +1642,7 @@ emit_fetch_system_value( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_src_register * reg, enum tgsi_opcode_type stype, - unsigned swizzle) + unsigned swizzle_in) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); struct gallivm_state *gallivm = bld->bld_base.base.gallivm; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c51e91b..d8930bf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2397,16 +2397,17 @@ static LLVMValueRef fetch_constant( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, - unsigned swizzle) + unsigned swizzle_in) { struct si_shader_context *ctx = si_shader_context(bld_base); struct si_shader_selector *sel = ctx->shader->selector; const struct tgsi_ind_register *ireg = ®->Indirect; unsigned buf, idx; + unsigned swizzle = swizzle_in & 0xffff; LLVMValueRef addr, bufp; - if (swizzle == LP_CHAN_ALL) { + if (swizzle_in == LP_CHAN_ALL) { unsigned chan; LLVMValueRef values[4]; for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) @@ -2420,7 +2421,7 @@ static LLVMValueRef fetch_constant( LLVMValueRef lo, hi; lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle); - hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle + 1); + hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, (swizzle_in >> 16)); return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), lo, hi); } diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 2016493..d48eda1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -445,13 +445,14 @@ get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index, LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, - unsigned swizzle) + unsigned swizzle_in) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef result = NULL, ptr, ptr2; + unsigned swizzle = swizzle_in & 0xffff; - if (swizzle == ~0) { + if (swizzle_in == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS]; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { @@ -476,7 +477,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctx->i32_0); result = LLVMConstInsertElement(result, - ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1], + ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)], ctx->i32_1); return LLVMConstBitCast(result, ctype); } else { @@ -503,7 +504,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, if (tgsi_type_is_64bit(type)) { ptr = result; - ptr2 = input[swizzle + 1]; + ptr2 = input[swizzle_in >> 16]; return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), ptr, ptr2); } @@ -515,7 +516,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; if (tgsi_type_is_64bit(type)) { - ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1]; + ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)]; return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), LLVMBuildLoad(builder, ptr, ""), LLVMBuildLoad(builder, ptr2, "")); @@ -526,7 +527,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, case TGSI_FILE_OUTPUT: ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle); if (tgsi_type_is_64bit(type)) { - ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1); + ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16)); return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), LLVMBuildLoad(builder, ptr, ""), LLVMBuildLoad(builder, ptr2, "")); @@ -544,11 +545,12 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, - unsigned swizzle) + unsigned swizzle_in) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef cval = ctx->system_values[reg->Register.Index]; + unsigned swizzle = swizzle_in & 0xffff; if (tgsi_type_is_64bit(type)) { LLVMValueRef lo, hi; @@ -558,7 +560,7 @@ static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, lo = LLVMBuildExtractElement( builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), ""); hi = LLVMBuildExtractElement( - builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), ""); + builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), ""); return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), lo, hi); -- 2.7.4