From a8246927e35a49097f70cffb7fa8dd05ec1365e1 Mon Sep 17 00:00:00 2001 From: Martin Andersson Date: Tue, 2 Apr 2013 22:43:33 +0200 Subject: [PATCH] r600g: Fix UMAD on Cayman MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The multiplication part of tgsi_umad did not work on Cayman, because it did not populate the correct vector slots. This fixed hardlocks in the EXT_transform_feedback/order tests. NOTE: This is a candidate for the stable branches. (might not be easy to cherry-pick though) Signed-off-by: Marek Olšák --- src/gallium/drivers/r600/r600_shader.c | 45 ++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a5d224f..f801707 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5838,7 +5838,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; - int i, j, r; + int i, j, k, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); /* src0 * src1 */ @@ -5846,21 +5846,40 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + if (ctx->bc->chip_class == CAYMAN) { + for (j = 0 ; j < 4; j++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.dst.chan = i; - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; + alu.op = ALU_OP2_MULLO_UINT; + for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { + r600_bytecode_src(&alu.src[k], &ctx->src[k], i); + } + tgsi_dst(ctx, &inst->Dst[0], j, &alu.dst); + alu.dst.sel = ctx->temp_reg; + alu.dst.write = (j == i); + if (j == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP2_MULLO_UINT; - for (j = 0; j < 2; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], i); - } + alu.dst.chan = i; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.op = ALU_OP2_MULLO_UINT; + for (j = 0; j < 2; j++) { + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); + } + + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } } -- 2.7.4