From 88fd397c741c0e1fe0d851fbc566925078df6013 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Pavel=20Ondra=C4=8Dka?= Date: Wed, 10 Aug 2022 09:17:56 +0200 Subject: [PATCH] r300: fix variables detection for paired ALU and TEX instructions in different branches MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit TEX instrutions can't write xyz and w to separate registers so we need to create variables from them first, otherwise we can create two variables from ALU writing the same register xyz and w in other branch (this usually works when TEX is not present as the xyz and w can read/write from different registers). This fixes regalloc because the variables are later used as a graph nodes. The variable order should not matter but it slightly does (leading to approx 0.3% shader-db temps increase as compared to previous state), so just sort the variables list afterwards to be as close to the previous behavior as possible and prevent the regression. CC: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6936 Signed-off-by: Pavel Ondračka Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/radeon_variable.c | 91 ++++++++++++++++++++-- 1 file changed, 86 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c index c021f5c..4c276a4 100644 --- a/src/gallium/drivers/r300/compiler/radeon_variable.c +++ b/src/gallium/drivers/r300/compiler/radeon_variable.c @@ -26,6 +26,7 @@ */ #include +#include #include "radeon_variable.h" #include "memory_pool.h" @@ -326,6 +327,31 @@ static void get_variable_pair_helper( } /** + * Compare function for sorting variable pointers by the lowest instruction + * IP from it and its friends. + */ +static int cmpfunc_variable_by_ip (const void * a, const void * b) { + struct rc_variable * var_a = *(struct rc_variable **)a; + struct rc_variable * var_b = *(struct rc_variable **)b; + unsigned int min_ip_a = var_a->Inst->IP; + unsigned int min_ip_b = var_b->Inst->IP; + + /* Find the minimal IP of a variable and its friends */ + while (var_a->Friend) { + var_a = var_a->Friend; + if (var_a->Inst->IP < min_ip_a) + min_ip_a = var_a->Inst->IP; + } + while (var_b->Friend) { + var_b = var_b->Friend; + if (var_b->Inst->IP < min_ip_b) + min_ip_b = var_b->Inst->IP; + } + + return (int)min_ip_a - (int)min_ip_b; +} + +/** * Generate a list of variables used by the shader program. Each instruction * that writes to a register is considered a variable. The struct rc_variable * data structure includes a list of readers and is essentially a @@ -337,14 +363,42 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c) struct rc_instruction * inst; struct rc_list * variable_list = NULL; + /* We search for the variables in two loops in order to get it right in + * the following specific case + * + * IF aluresult.x___; + * ... + * MAD temp[0].xyz, src0.000, src0.111, src0.000 + * MAD temp[0].w, src0.0, src0.1, src0.0 + * ELSE; + * ... + * TXB temp[0], temp[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; + * ENDIF; + * src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] SEM_WAIT + * MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 + * MAD temp[1].w, src0.w, src1.w, src0.0 + * + * If we go just in one loop, we will first create two variables for the + * temp[0].xyz and temp[0].w. This happens because they don't share a reader + * as the src1.xyz and src1.w of the instruction where the value is used are + * in theory independent. They are not because the same register is written + * also by the texture instruction in the other branch and TEX can't write xyz + * and w separatelly. + * + * Therefore first search for RC_INSTRUCTION_NORMAL to create variables from + * the texture instruction and than the pair instructions will be properly + * marked as friends. So we will end with only one variable here as we should. + * + * This doesn't matter before the pair translation, because everything is + * RC_INSTRUCTION_NORMAL. + */ for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - struct rc_reader_data reader_data; - struct rc_variable * new_var; - memset(&reader_data, 0, sizeof(reader_data)); - if (inst->Type == RC_INSTRUCTION_NORMAL) { + struct rc_reader_data reader_data; + struct rc_variable * new_var; + memset(&reader_data, 0, sizeof(reader_data)); rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); if (reader_data.ReaderCount == 0) { continue; @@ -353,7 +407,15 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c) inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask, &reader_data); get_variable_helper(&variable_list, new_var); - } else { + } + } + + bool needs_sorting = false; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->Type != RC_INSTRUCTION_NORMAL) { + needs_sorting = true; get_variable_pair_helper(&variable_list, c, inst, &inst->U.P.RGB); get_variable_pair_helper(&variable_list, c, inst, @@ -361,6 +423,25 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c) } } + if (variable_list && needs_sorting) { + unsigned int count = rc_list_count(variable_list); + struct rc_variable **variables = memory_pool_malloc(&c->Pool, + sizeof(struct rc_variable *) * count); + + struct rc_list * current = variable_list; + for(unsigned int i = 0; current; i++, current = current->Next) { + struct rc_variable * var = current->Item; + variables[i] = var; + } + + qsort(variables, count, sizeof(struct rc_variable *), cmpfunc_variable_by_ip); + + current = variable_list; + for(unsigned int i = 0; current; i++, current = current->Next) { + current->Item = variables[i]; + } + } + return variable_list; } -- 2.7.4