From d086bb22bce6c9ed0afeb782ccd4ba6fa2561e02 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 24 Apr 2013 10:44:56 -0400 Subject: [PATCH] freedreno: fix temp register usage The previous approach of using the dst register as an intermediate temporary doesn't work in a lot of cases. For example, if the dst register is the same as one of the src registers. For now, just simplify it and always allocate a new register to use as an intermediate. In some cases this will result in more registers used than required. I think the best solution would be to implement an optimization pass to reduce the number of registers used, which would also solve the problem we have now of not being able to use GPRs that are assigned for TGSI_FILE_INPUT. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/freedreno_compiler.c | 100 +++++++++++---------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.c b/src/gallium/drivers/freedreno/freedreno_compiler.c index 3d7f7c9..a26f8cf 100644 --- a/src/gallium/drivers/freedreno/freedreno_compiler.c +++ b/src/gallium/drivers/freedreno/freedreno_compiler.c @@ -57,11 +57,13 @@ struct fd_compile_context { /* Internal-Temporary and Predicate register assignment: * * Some TGSI instructions which translate into multiple actual - * instructions need one or more temporary registers (which are not + * instructions need one or more temporary registers, which are not * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). - * Whenever possible, the dst register is used as the first temporary, - * but this is not possible when the dst register is in an export (ie. - * in TGSI_FILE_OUTPUT). + * And some instructions (texture fetch) cannot write directly to + * output registers. We could be more clever and re-use dst or a + * src register in some cases. But for now don't try to be clever. + * Eventually we should implement an optimization pass that re- + * juggles the register usage and gets rid of unneeded temporaries. * * The predicate register must be valid across multiple TGSI * instructions, but internal temporary's do not. For this reason, @@ -513,6 +515,21 @@ add_regs_scalar_1(struct fd_compile_context *ctx, * Helpers for TGSI instructions that don't map to a single shader instr: */ +static void +src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) +{ + src->File = dst->File; + src->Indirect = dst->Indirect; + src->Dimension = dst->Dimension; + src->Index = dst->Index; + src->Absolute = 0; + src->Negate = 0; + src->SwizzleX = TGSI_SWIZZLE_X; + src->SwizzleY = TGSI_SWIZZLE_Y; + src->SwizzleZ = TGSI_SWIZZLE_Z; + src->SwizzleW = TGSI_SWIZZLE_W; +} + /* Get internal-temp src/dst to use for a sequence of instructions * generated by a single TGSI op.. if possible, use the final dst * register as the temporary to avoid allocating a new register, but @@ -521,44 +538,26 @@ add_regs_scalar_1(struct fd_compile_context *ctx, * so that you don't end up using the same register for all your * internal temps. */ -static bool +static void get_internal_temp(struct fd_compile_context *ctx, - struct tgsi_dst_register *orig_dst, struct tgsi_dst_register *tmp_dst, struct tgsi_src_register *tmp_src) { - bool using_temp = false; + int n; tmp_dst->File = TGSI_FILE_TEMPORARY; tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; tmp_dst->Indirect = 0; tmp_dst->Dimension = 0; - if (orig_dst && (orig_dst->File != TGSI_FILE_OUTPUT)) { - /* if possible, use orig dst register for the temporary: */ - tmp_dst->Index = orig_dst->Index; - } else { - /* otherwise assign one: */ - int n = ctx->num_internal_temps++; - if (ctx->pred_reg != -1) - n++; - tmp_dst->Index = get_temp_gpr(ctx, - ctx->num_regs[TGSI_FILE_TEMPORARY] + n); - using_temp = true; - } + /* assign next temporary: */ + n = ctx->num_internal_temps++; + if (ctx->pred_reg != -1) + n++; + + tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n; - tmp_src->File = tmp_dst->File; - tmp_src->Indirect = tmp_dst->Indirect; - tmp_src->Dimension = tmp_dst->Dimension; - tmp_src->Index = tmp_dst->Index; - tmp_src->Absolute = 0; - tmp_src->Negate = 0; - tmp_src->SwizzleX = TGSI_SWIZZLE_X; - tmp_src->SwizzleY = TGSI_SWIZZLE_Y; - tmp_src->SwizzleZ = TGSI_SWIZZLE_Z; - tmp_src->SwizzleW = TGSI_SWIZZLE_W; - - return using_temp; + src_from_dst(tmp_src, tmp_dst); } static void @@ -574,12 +573,7 @@ get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst, dst->Index = get_temp_gpr(ctx, ctx->pred_reg); if (src) { - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; + src_from_dst(src, dst); src->SwizzleX = TGSI_SWIZZLE_W; src->SwizzleY = TGSI_SWIZZLE_W; src->SwizzleZ = TGSI_SWIZZLE_W; @@ -717,7 +711,7 @@ translate_pow(struct fd_compile_context *ctx, struct tgsi_src_register tmp_src; struct ir2_instruction *alu; - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + get_internal_temp(ctx, &tmp_dst, &tmp_src); alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); add_regs_dummy_vector(alu); @@ -763,16 +757,25 @@ translate_tex(struct fd_compile_context *ctx, struct tgsi_full_instruction *inst, unsigned opc) { struct ir2_instruction *instr; + struct ir2_register *reg; struct tgsi_dst_register tmp_dst; struct tgsi_src_register tmp_src; const struct tgsi_src_register *coord; - bool using_temp; + bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || + (inst->Instruction.Saturate != TGSI_SAT_NONE); int idx; - using_temp = get_internal_temp(ctx, - &inst->Dst[0].Register, &tmp_dst, &tmp_src); + if (using_temp || (opc == TGSI_OPCODE_TXP)) + get_internal_temp(ctx, &tmp_dst, &tmp_src); if (opc == TGSI_OPCODE_TXP) { + static const char *swiz[] = { + [TGSI_SWIZZLE_X] = "xxxx", + [TGSI_SWIZZLE_Y] = "yyyy", + [TGSI_SWIZZLE_Z] = "zzzz", + [TGSI_SWIZZLE_W] = "wwww", + }; + /* TXP - Projective Texture Lookup: * * coord.x = src0.x / src.w @@ -792,7 +795,8 @@ translate_tex(struct fd_compile_context *ctx, /* RECIP_IEEE: */ add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; - add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww"; + add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = + swiz[inst->Src[0].Register.SwizzleW]; instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; @@ -813,8 +817,8 @@ translate_tex(struct fd_compile_context *ctx, ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; ctx->so->tfetch_instrs[idx].instr = instr; - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, coord); + add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register); + reg = add_src_reg(ctx, instr, coord); /* dst register needs to be marked for sync: */ ctx->need_sync |= 1 << instr->regs[0]->num; @@ -862,7 +866,7 @@ translate_sge_slt(struct fd_compile_context *ctx, break; } - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + get_internal_temp(ctx, &tmp_dst, &tmp_src); instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); add_dst_reg(ctx, instr, &tmp_dst); @@ -893,8 +897,8 @@ translate_lrp(struct fd_compile_context *ctx, struct tgsi_src_register tmp_src1, tmp_src2; struct tgsi_src_register tmp_const; - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst1, &tmp_src1); - get_internal_temp(ctx, NULL, &tmp_dst2, &tmp_src2); + get_internal_temp(ctx, &tmp_dst1, &tmp_src1); + get_internal_temp(ctx, &tmp_dst2, &tmp_src2); get_immediate(ctx, &tmp_const, fui(1.0)); @@ -945,7 +949,7 @@ translate_trig(struct fd_compile_context *ctx, break; } - get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src); + get_internal_temp(ctx, &tmp_dst, &tmp_src); tmp_dst.WriteMask = TGSI_WRITEMASK_X; tmp_src.SwizzleX = tmp_src.SwizzleY = -- 2.7.4