From: Daniel Schürmann Date: Wed, 23 Jun 2021 13:53:43 +0000 (+0200) Subject: aco/ra: handle copies of definition registers X-Git-Tag: upstream/21.2.3~1310 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=995e218993767651c128cd89c5f90b6684fb4b50;p=platform%2Fupstream%2Fmesa.git aco/ra: handle copies of definition registers Previously, it could happen that a parallelcopy of a definition was inserted before the instruction. Fixes Rage 2 with GFX7. No fossil-db changes. Reviewed-by: Rhys Perry Part-of: --- diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index e524240..b0b8915 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -703,12 +703,32 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file, } /* allocate id's and rename operands: this is done transparently here */ - for (std::pair& copy : parallelcopies) { - /* the definitions with id are not from this function and already handled */ - if (copy.second.isTemp()) + auto it = parallelcopies.begin(); + while (it != parallelcopies.end()) { + if (it->second.isTemp()) { + ++it; + continue; + } + + /* check if we moved a definition: change the register and remove copy */ + bool is_def = false; + for (Definition& def : instr->definitions) { + if (def.isTemp() && def.getTemp() == it->first.getTemp()) { + // FIXME: ensure that the definition can use this reg + def.setFixed(it->second.physReg()); + reg_file.fill(def); + ctx.assignments[def.tempId()].reg = def.physReg(); + it = parallelcopies.erase(it); + is_def = true; + break; + } + } + if (is_def) continue; - /* check if we we moved another parallelcopy definition */ + std::pair& copy = *it; + + /* check if we moved another parallelcopy definition */ for (std::pair& other : parallelcopies) { if (!other.second.isTemp()) continue; @@ -717,7 +737,7 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file, copy.first.setFixed(other.first.physReg()); } } - // FIXME: if a definition got moved, change the target location and remove the parallelcopy + copy.second.setTemp(ctx.program->allocateTmp(copy.second.regClass())); ctx.assignments.emplace_back(copy.second.physReg(), copy.second.regClass()); assert(ctx.assignments.size() == ctx.program->peekAllocationId()); @@ -754,6 +774,8 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file, if (fill) reg_file.fill(copy.second); + + ++it; } }