From daee6bc1a1af0ac472ca2766b772ae86082d2c98 Mon Sep 17 00:00:00 2001 From: Hyunjun Ko Date: Tue, 19 Mar 2019 07:17:40 +0000 Subject: [PATCH] freedreno/ir3: set dst type of alu instructions correctly. Though it should be fixed in RA pass, it needs to be set correctly from the beginning according to the bitsize of NIR dest. v2: Would be better for mad,fddx,fddy to fixup later in RA pass. [small cleanup of fallout from imov/fmov removal fallout] Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_compiler_nir.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 8fbb45c..8692869 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -294,6 +294,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) unsigned bs[info->num_inputs]; /* bit size */ struct ir3_block *b = ctx->block; unsigned dst_sz, wrmask; + type_t dst_type = nir_dest_bit_size(alu->dest.dest) < 32 ? + TYPE_U16 : TYPE_U32; if (alu->dest.dest.is_ssa) { dst_sz = alu->dest.dest.ssa.num_components; @@ -321,8 +323,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) src[i] = ir3_get_src(ctx, &asrc->src)[asrc->swizzle[0]]; if (!src[i]) - src[i] = create_immed(ctx->block, 0); - dst[i] = ir3_MOV(b, src[i], TYPE_U32); + src[i] = create_immed_typed(ctx->block, 0, dst_type); + dst[i] = ir3_MOV(b, src[i], dst_type); } ir3_put_dst(ctx, &alu->dest.dest); @@ -333,13 +335,12 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) * handle those specially: */ if (alu->op == nir_op_mov) { - type_t type = TYPE_U32; nir_alu_src *asrc = &alu->src[0]; struct ir3_instruction *const *src0 = ir3_get_src(ctx, &asrc->src); for (unsigned i = 0; i < dst_sz; i++) { if (wrmask & (1 << i)) { - dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type); + dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], dst_type); } else { dst[i] = NULL; } @@ -392,6 +393,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) dst[0] = ir3_n2b(b, dst[0]); break; case nir_op_b2f16: + dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F16); + break; case nir_op_b2f32: dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32); break; @@ -430,7 +433,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) (list_length(&alu->src[0].src.ssa->uses) == 1) && ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) { src[0]->flags |= IR3_INSTR_SAT; - dst[0] = ir3_MOV(b, src[0], TYPE_U32); + dst[0] = ir3_MOV(b, src[0], dst_type); } else { /* otherwise generate a max.f that saturates.. blob does * similar (generating a cat2 mov using max.f) -- 2.7.4