From 10938cddd7a19086f3b4bbafd8ce2a555fd4f7fb Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 6 Jul 2023 16:23:21 +0200 Subject: [PATCH] r600/sfn: Switch to register intrinsics Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp | 2 +- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 4 +- src/gallium/drivers/r600/sfn/sfn_optimizer.cpp | 41 ++++- src/gallium/drivers/r600/sfn/sfn_shader.cpp | 151 +++++++++++++++-- src/gallium/drivers/r600/sfn/sfn_shader.h | 8 +- src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp | 182 +++++++-------------- src/gallium/drivers/r600/sfn/sfn_valuefactory.h | 9 +- src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp | 3 +- .../drivers/r600/sfn/tests/sfn_test_shaders.cpp | 80 ++++----- 9 files changed, 291 insertions(+), 189 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index f7697c0..991f5fb 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -503,7 +503,7 @@ bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src) /* If the old or new source is an array element, we assume that there * might have been an (untracked) indirect access, so don't replace * this source */ - if (old_src->pin() == pin_array || new_src->pin() == pin_array) + if (old_src->pin() == pin_array && new_src->pin() == pin_array) return false; auto [addr, dummy, index] = indirect_addr(); diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 115b1df..8c34900 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -934,8 +934,8 @@ r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sh, nir_lower_bool_to_int32); - NIR_PASS_V(sh, nir_lower_locals_to_regs, 32); - NIR_PASS_V(sh, nir_convert_from_ssa, true, false); + NIR_PASS_V(sh, nir_lower_locals_to_reg_intrinsics, 32); + NIR_PASS_V(sh, nir_convert_from_ssa, true, true); NIR_PASS_V(sh, nir_opt_dce); if (rctx->screen->b.debug_flags & DBG_ALL_SHADERS) { diff --git a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp index eff6007..986019a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp @@ -373,14 +373,18 @@ CopyPropFwdVisitor::visit(AluInstr *instr) auto ii = dest->uses().begin(); auto ie = dest->uses().end(); + auto mov_block_id = instr->block_id(); + while(ii != ie) { auto i = *ii; + auto target_block_id = i->block_id(); + ++ii; /* SSA can always be propagated, registers only in the same block * and only if they are assigned in the same block */ - bool can_propagate = dest->has_flag(Register::ssa); + bool dest_can_propagate = dest->has_flag(Register::ssa); - if (!can_propagate) { + if (!dest_can_propagate) { /* Register can propagate if the assignment was in the same * block, and we don't have a second assignment coming later @@ -391,20 +395,45 @@ CopyPropFwdVisitor::visit(AluInstr *instr) * 3: MOV SN.x, R0.x * * Here we can't prpagate the move in 1 to SN.x in 3 */ - if ((instr->block_id() == i->block_id() && instr->index() < i->index())) { - can_propagate = true; + if ((mov_block_id == target_block_id && instr->index() < i->index())) { + dest_can_propagate = true; if (dest->parents().size() > 1) { for (auto p : dest->parents()) { if (p->block_id() == i->block_id() && p->index() > instr->index()) { - can_propagate = false; + dest_can_propagate = false; + break; + } + } + } + } + } + + bool src_can_propagate = false; + if (auto rsrc = src->as_register()) { + if (rsrc->has_flag(Register::ssa)) { + src_can_propagate = true; + } else if (mov_block_id == target_block_id) { + if (rsrc->addr()) { + if (i->block_id() == mov_block_id && + i->index() == instr->index() + 1) + src_can_propagate = true; + } else { + src_can_propagate = true; + for (auto p : rsrc->parents()) { + if (p->block_id() == mov_block_id && + p->index() > instr->index() && + p->index() < i->index()) { + src_can_propagate = false; break; } } } } + } else { + src_can_propagate = true; } - if (can_propagate) { + if (dest_can_propagate && src_can_propagate) { sfn_log << SfnLog::opt << " Try replace in " << i->block_id() << ":" << i->index() << *i << "\n"; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 2e33622..859c880 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -552,7 +552,8 @@ Shader::process(nir_shader *nir) allocate_reserved_registers(); - allocate_local_registers(&func->impl->registers); + value_factory().allocate_registers(m_register_allocations); + m_required_registers = value_factory().array_registers(); sfn_log << SfnLog::trans << "Process shader \n"; foreach_list_typed(nir_cf_node, node, node, &func->impl->body) @@ -566,14 +567,6 @@ Shader::process(nir_shader *nir) return true; } -void -Shader::allocate_local_registers(const exec_list *registers) -{ - if (value_factory().allocate_registers(registers)) - m_indirect_files |= 1 << TGSI_FILE_TEMPORARY; - m_required_registers = value_factory().array_registers(); -} - bool Shader::scan_shader(const nir_function *func) { @@ -682,6 +675,9 @@ Shader::scan_instruction(nir_instr *instr) (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image) && nir_intrinsic_memory_scope(intr) != SCOPE_NONE); break; + case nir_intrinsic_decl_reg: + m_register_allocations.push_back(intr); + break; default:; } return true; @@ -918,7 +914,18 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr) return emit_atomic_local_shared(intr); case nir_intrinsic_shader_clock: return emit_shader_clock(intr); - + case nir_intrinsic_load_reg: + return emit_load_reg(intr); + case nir_intrinsic_load_reg_indirect: + return emit_load_reg_indirect(intr); + case nir_intrinsic_store_reg: + return emit_store_reg(intr); + case nir_intrinsic_store_reg_indirect: + return emit_store_reg_indirect(intr); + case nir_intrinsic_decl_reg: + // Registers and arrays are allocated at + // conversion startup time + return true; default: return false; } @@ -966,6 +973,130 @@ Shader::emit_load_to_register(PVirtualValue src) return dest; } +// add visitor to resolve array and register +class RegisterAccessHandler : public RegisterVisitor { + +public: + RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr); + + void visit(LocalArrayValue& value) override {(void)value; assert(0);} + void visit(UniformValue& value) override {(void)value; assert(0);} + void visit(LiteralConstant& value) override {(void)value; assert(0);} + void visit(InlineConstant& value) override {(void)value; assert(0);} + + Shader& sh; + nir_intrinsic_instr *ir; + PVirtualValue addr{nullptr}; + bool success{true}; +}; + +class RegisterReadHandler : public RegisterAccessHandler { + +public: + using RegisterAccessHandler::RegisterAccessHandler; + using RegisterAccessHandler::visit; + + void visit(LocalArray& value) override; + void visit(Register& value) override; +}; + +bool Shader::emit_load_reg(nir_intrinsic_instr *intr) +{ + RegisterReadHandler visitor(*this, intr); + auto handle = value_factory().src(intr->src[0], 0); + handle->accept(visitor); + return visitor.success; +} + +bool Shader::emit_load_reg_indirect(nir_intrinsic_instr *intr) +{ + RegisterReadHandler visitor(*this, intr); + visitor.addr = value_factory().src(intr->src[1], 0); + auto handle = value_factory().src(intr->src[0], 0); + handle->accept(visitor); + return visitor.success; +} + +class RegisterWriteHandler : public RegisterAccessHandler { + +public: + using RegisterAccessHandler::RegisterAccessHandler; + using RegisterAccessHandler::visit; + + void visit(LocalArray& value) override; + void visit(Register& value) override; +}; + + +bool Shader::emit_store_reg(nir_intrinsic_instr *intr) +{ + RegisterWriteHandler visitor(*this, intr); + auto handle = value_factory().src(intr->src[1], 0); + handle->accept(visitor); + return visitor.success; +} + +bool Shader::emit_store_reg_indirect(nir_intrinsic_instr *intr) +{ + RegisterWriteHandler visitor(*this, intr); + visitor.addr = value_factory().src(intr->src[2], 0); + + auto handle = value_factory().src(intr->src[1], 0); + handle->accept(visitor); + return visitor.success; +} + +RegisterAccessHandler::RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr): + sh(shader), + ir(intr) +{} + +void RegisterReadHandler::visit(LocalArray& array) +{ + int slots = ir->dest.ssa.bit_size / 32; + auto pin = ir->dest.ssa.num_components > 1 ? pin_none : pin_free; + for (int i = 0; i < ir->dest.ssa.num_components; ++i) { + for (int s = 0; s < slots; ++s) { + int chan = i * slots + s; + auto dest = sh.value_factory().dest(ir->dest, chan, pin); + auto src = array.element(nir_intrinsic_base(ir), addr, chan); + sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write)); + } + } +} + +void RegisterReadHandler::visit(Register& reg) +{ + auto dest = sh.value_factory().dest(ir->dest, 0, pin_free); + sh.emit_instruction(new AluInstr(op1_mov, dest, ®, AluInstr::write)); +} + +void RegisterWriteHandler::visit(LocalArray& array) +{ + int writemask = nir_intrinsic_write_mask(ir); + int slots = ir->src->ssa->bit_size / 32; + + for (int i = 0; i < ir->num_components; ++i) { + if (!(writemask & (1 << i))) + continue; + for (int s = 0; s < slots; ++s) { + int chan = i * slots + s; + + auto dest = array.element(nir_intrinsic_base(ir), addr, chan); + auto src = sh.value_factory().src(ir->src[0], chan); + sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write)); + } + } +} + +void RegisterWriteHandler::visit(Register& dest) +{ + int writemask = nir_intrinsic_write_mask(ir); + assert(writemask == 1); + auto src = sh.value_factory().src(ir->src[0], 0); + sh.emit_instruction(new AluInstr(op1_mov, &dest, src, AluInstr::write)); +} + bool Shader::emit_atomic_local_shared(nir_intrinsic_instr *instr) { diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.h b/src/gallium/drivers/r600/sfn/sfn_shader.h index 1204291..236a62b 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader.h @@ -298,8 +298,6 @@ private: bool scan_uniforms(nir_variable *uniform); void allocate_reserved_registers(); - void allocate_local_registers(const exec_list *registers); - virtual int do_allocate_reserved_registers() = 0; bool scan_instruction(nir_instr *instr); @@ -323,6 +321,10 @@ private: bool emit_shader_clock(nir_intrinsic_instr *instr); bool emit_wait_ack(); bool emit_scoped_barrier(nir_intrinsic_instr *instr); + bool emit_load_reg(nir_intrinsic_instr *intr); + bool emit_load_reg_indirect(nir_intrinsic_instr *intr); + bool emit_store_reg(nir_intrinsic_instr *intr); + bool emit_store_reg_indirect(nir_intrinsic_instr *intr); bool equal_to(const Shader& other) const; void finalize(); @@ -400,6 +402,8 @@ private: InstructionChain m_chain_instr; std::list> m_loops; int m_control_flow_depth{0}; + std::list m_register_allocations; + }; std::pair diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp index ce12ff7..d9af52a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp @@ -53,37 +53,42 @@ ValueFactory::set_virtual_register_base(int base) } bool -ValueFactory::allocate_registers(const exec_list *registers) +ValueFactory::allocate_registers(const std::list& regs) { - bool has_arrays = false; struct array_entry { unsigned index; unsigned length; - unsigned ncomponents; + int ncomponents; bool operator()(const array_entry& a, const array_entry& b) const { - return a.length < b.length || - (a.length == b.length && a.ncomponents > b.ncomponents); + return a.ncomponents < b.ncomponents || + (a.ncomponents == b.ncomponents && a.length < b.length); } }; using array_list = std::priority_queue, array_entry>; + std::list non_array; array_list arrays; + for(auto intr : regs) { + unsigned num_elms = nir_intrinsic_num_array_elems(intr); + int num_comp = nir_intrinsic_num_components(intr); + int bit_size = nir_intrinsic_bit_size(intr); - foreach_list_typed(nir_register, reg, node, registers) - { - if (reg->num_array_elems) { + if (num_elms > 0 || num_comp > 1 || bit_size > 32) { array_entry ae = { - reg->index, reg->num_array_elems, reg->bit_size / 32 * reg->num_components}; + intr->dest.ssa.index, + num_elms ? num_elms : 1, + bit_size / 32 * num_comp}; arrays.push(ae); - has_arrays = true; + } else { + non_array.push_back(intr->dest.ssa.index); } } - int ncomponents = 0; + int free_components = 4; int sel = m_next_register_index; unsigned length = 0; @@ -94,48 +99,39 @@ ValueFactory::allocate_registers(const exec_list *registers) /* This is a bit hackish, return an id that encodes the array merge. To * make sure that the mapping doesn't go wrong we have to make sure the * arrays is longer than the number of instances in this arrays slot */ - if (a.ncomponents + ncomponents > 4 || a.length > length) { + if (a.ncomponents > free_components || a.length > length) { sel = m_next_register_index; - ncomponents = 0; - length = 0; + free_components = 4; + m_next_register_index += a.length; } - if (ncomponents == 0) - m_next_register_index += a.length; + uint32_t frac = free_components - a.ncomponents; - uint32_t frac = ncomponents; auto array = new LocalArray(sel, a.ncomponents, a.length, frac); - for (unsigned i = 0; i < a.ncomponents; ++i) { + for (int i = 0; i < a.ncomponents; ++i) { RegisterKey key(a.index, i, vp_array); - m_channel_counts.inc_count(i); + m_channel_counts.inc_count(frac + i, a.length); m_registers[key] = array; sfn_log << SfnLog::reg << __func__ << ": Allocate array " << key << ":" << *array << "\n"; } - ncomponents += a.ncomponents; + free_components -= a.ncomponents; length = a.length; } m_required_array_registers = m_next_register_index ? m_next_register_index : 0; - foreach_list_typed(nir_register, reg, node, registers) - { - if (!reg->num_array_elems) { - uint32_t sel = m_next_register_index++; - unsigned num_components = reg->num_components * reg->bit_size / 32; - for (auto chan = 0u; chan < num_components; ++chan) { - RegisterKey key(reg->index, chan, vp_register); - m_channel_counts.inc_count(chan); - m_registers[key] = - new Register(sel, chan, num_components > 1 ? pin_none : pin_free); - sfn_log << SfnLog::reg << "allocate register " << key << ":" - << *m_registers[key] << "\n"; - } - } + for (auto index : non_array) { + RegisterKey key(index, 0, vp_register); + auto chan = m_channel_counts.least_used(0xf); + m_registers[key] = new Register(m_next_register_index++, + chan, pin_free); + m_channel_counts.inc_count(chan); } - return has_arrays; + + return true; } int ValueFactory::new_register_index() @@ -213,43 +209,10 @@ public: }; PRegister -ValueFactory::resolve_array(nir_register *reg, - nir_src *indirect, - int base_offset, - int chan) -{ - PVirtualValue addr = nullptr; - auto type = reg->num_array_elems ? vp_array : vp_register; - RegisterKey key(reg->index, chan, type); - auto ireg = m_registers.find(key); - if (ireg == m_registers.end()) { - std::cerr << "Key " << key << " not found\n"; - assert(0); - } - - if (reg->num_array_elems) { - - if (indirect) - addr = src(*indirect, 0); - - TranslateRegister array_resolution(base_offset, addr, chan); - - ireg->second->accept(array_resolution); - assert(array_resolution.m_value); - return array_resolution.m_value; - } else { - return ireg->second; - } -} - -PRegister ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel, uint8_t chan_mask) { - if (dst.is_ssa) { - return dest(dst.ssa, chan, pin_channel, chan_mask); - } else { - return resolve_array(dst.reg.reg, dst.reg.indirect, dst.reg.base_offset, chan); - } + assert(dst.is_ssa); + return dest(dst.ssa, chan, pin_channel, chan_mask); } void @@ -320,30 +283,12 @@ ValueFactory::dest_vec4(const nir_dest& dst, Pin pin) { if (pin != pin_group && pin != pin_chgr) pin = pin_chan; - if (dst.is_ssa) { - PRegister x = dest(dst, 0, pin); - PRegister y = dest(dst, 1, pin); - PRegister z = dest(dst, 2, pin); - PRegister w = dest(dst, 3, pin); - return RegisterVec4(x, y, z, w, pin); - } else { - assert(!dst.reg.indirect); - PRegister v[4]; - int sel = -1; - for (int i = 0; i < 4; ++i) { - RegisterKey key(dst.reg.reg->index, i, vp_register); - v[i] = m_registers[key]; - assert(sel >= 0 || v[i]); - if (sel < 0) - sel = v[i]->sel(); - - if (!v[i]) { - v[i] = m_registers[key] = new Register(sel, i, pin_group); - } - } - return RegisterVec4(v[0], v[1], v[2], v[3], pin); - } - unreachable("unsupported"); + assert(dst.is_ssa); + PRegister x = dest(dst, 0, pin); + PRegister y = dest(dst, 1, pin); + PRegister z = dest(dst, 2, pin); + PRegister w = dest(dst, 3, pin); + return RegisterVec4(x, y, z, w, pin); } PRegister ValueFactory::addr() @@ -386,16 +331,12 @@ ValueFactory::src(const nir_src& src, int chan) { sfn_log << SfnLog::reg << "search (ref) " << (void *)&src << "\n"; - if (src.is_ssa) { - sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan - << " got "; - auto val = ssa_src(*src.ssa, chan); - sfn_log << *val << "\n"; - return val; - } else { - sfn_log << SfnLog::reg << "search reg " << src.reg.reg->index << "\n"; - return local_register(src.reg, chan); - } + assert(src.is_ssa); + sfn_log << SfnLog::reg << "search ssa " << src.ssa->index << " c:" << chan + << " got "; + auto val = ssa_src(*src.ssa, chan); + sfn_log << *val << "\n"; + return val; } PVirtualValue @@ -486,20 +427,21 @@ ValueFactory::ssa_src(const nir_ssa_def& ssa, int chan) if (ival != m_values.end()) return ival->second; - std::cerr << "Didn't find source with key " << key << "\n"; - unreachable("Source values should always exist"); -} + RegisterKey rkey(ssa.index, chan, vp_register); + sfn_log << SfnLog::reg << "search src with key" << rkey << "\n"; -PRegister -ValueFactory::local_register(const nir_register_dest& dst, int chan) -{ - return resolve_array(dst.reg, dst.indirect, dst.base_offset, chan); -} + ireg = m_registers.find(rkey); + if (ireg != m_registers.end()) + return ireg->second; -PRegister -ValueFactory::local_register(const nir_register_src& src, int chan) -{ - return resolve_array(src.reg, src.indirect, src.base_offset, chan); + RegisterKey array_key(ssa.index, chan, vp_array); + sfn_log << SfnLog::reg << "search array with key" << array_key << "\n"; + auto iarray = m_registers.find(array_key); + if (iarray != m_registers.end()) + return iarray->second; + + std::cerr << "Didn't find source with key " << key << "\n"; + unreachable("Source values should always exist"); } PVirtualValue @@ -1040,11 +982,9 @@ ValueFactory::prepare_live_range_map() continue; if (key.value.pool == vp_array) { - if (key.value.chan == 0) { - auto array = static_cast(reg); - for (auto& a : *array) { - result.append_register(a); - } + auto array = static_cast(reg); + for (auto& a : *array) { + result.append_register(a); } } else { if (reg->chan() < 4) diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h index 5391af3..cc0671c 100644 --- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h @@ -178,6 +178,7 @@ struct register_key_hash { class ChannelCounts { public: void inc_count(int chan) { ++m_counts[chan]; } + void inc_count(int chan, int n) { m_counts[chan] += n; } int least_used(uint8_t mask) const { int least_used = 0; @@ -222,8 +223,9 @@ public: int new_register_index(); - bool allocate_registers(const exec_list *registers); + bool allocate_registers(const std::list& regs); PRegister allocate_pinned_register(int sel, int chan); + RegisterVec4 allocate_pinned_vec4(int sel, bool is_ssa); void inject_value(const nir_dest& dest, int chan, PVirtualValue value); @@ -292,11 +294,6 @@ public: private: PVirtualValue ssa_src(const nir_ssa_def& dest, int chan); - PRegister local_register(const nir_register_dest& dest, int chan); - PRegister local_register(const nir_register_src& dest, int chan); - PRegister - resolve_array(nir_register *reg, nir_src *indirect, int base_offset, int chan); - int m_next_register_index; int m_next_temp_channel{0}; diff --git a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp index 019938f..5664d88 100644 --- a/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_virtualvalues.cpp @@ -873,9 +873,10 @@ LocalArray::LocalArray(int base_sel, int nchannels, int size, int frac): sfn_log << SfnLog::reg << "Allocate array A" << base_sel << "(" << size << ", " << frac << ", " << nchannels << ")\n"; + auto pin = m_size > 1 ? pin_array : (nchannels > 1 ? pin_none : pin_free); for (int c = 0; c < nchannels; ++c) { for (unsigned i = 0; i < m_size; ++i) { - PRegister reg = new Register(base_sel + i, c + frac, pin_array); + PRegister reg = new Register(base_sel + i, c + frac, pin); m_values[m_size * c + i] = new LocalArrayValue(reg, *this); } } diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index b63bc81..e362be2 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -1912,27 +1912,27 @@ ELSE ALU MOV A2[S37.x].x : I[0] {W} ALU MOV A2[S37.x].y : L[0x3dcccccd] {WL} ENDIF -ALU MOV S1025.x@group{s} : A2[0].x {W} -ALU MOV S1025.y@group{s} : A2[0].y {WL} -ALU MOV S1025.z@group{s} : A2[1].x {W} -ALU MOV S1025.w@group{s} : A2[1].y {WL} -ALU MOV S1027.x@group{s} : A2[2].x {W} -ALU MOV S1027.y@group{s} : A2[2].y {WL} -ALU MOV S1027.z@group{s} : A2[3].x {W} -ALU MOV S1027.w@group{s} : A2[3].y {WL} -ALU MOV S1029.x@group{s} : A2[0].z {W} -ALU MOV S1029.y@group{s} : A2[0].w {WL} -ALU MOV S1029.z@group{s} : A2[1].z {W} -ALU MOV S1029.w@group{s} : A2[1].w {WL} -ALU MOV S1031.x@group{s} : A2[2].z {W} -ALU MOV S1031.y@group{s} : A2[2].w {WL} -ALU MOV S1031.z@group{s} : A2[3].z {W} -ALU MOV S1031.w@group{s} : A2[3].w {WL} EXPORT_DONE POS 0 S19.xyzw -EXPORT PARAM 0 S1025.xyzw -EXPORT PARAM 1 S1027.xyzw -EXPORT PARAM 2 S1029.xyzw -EXPORT_DONE PARAM 3 S1031.xyzw +ALU MOV S46.x@group{s} : A2[0].x {W} +ALU MOV S46.y@group{s} : A2[0].y {W} +ALU MOV S46.z@group{s} : A2[1].x {W} +ALU MOV S46.w@group{s} : A2[1].y {WL} +EXPORT PARAM 0 S46.xyzw +ALU MOV S47.x@group{s} : A2[2].x {W} +ALU MOV S47.y@group{s} : A2[2].y {W} +ALU MOV S47.z@group{s} : A2[3].x {W} +ALU MOV S47.w@group{s} : A2[3].y {WL} +EXPORT PARAM 1 S47.xyzw +ALU MOV S48.x@group{s} : A2[0].z {W} +ALU MOV S48.y@group{s} : A2[0].w {W} +ALU MOV S48.z@group{s} : A2[1].z {W} +ALU MOV S48.w@group{s} : A2[1].w {WL} +EXPORT PARAM 2 S48.xyzw +ALU MOV S49.x@group{s} : A2[2].z {W} +ALU MOV S49.y@group{s} : A2[2].w {W} +ALU MOV S49.z@group{s} : A2[3].z {W} +ALU MOV S49.w@group{s} : A2[3].w {WL} +EXPORT_DONE PARAM 3 S49.xyzw )"; const char *shader_with_dest_array_opt_scheduled = @@ -2022,36 +2022,36 @@ ELSE ALU_GROUP_END ENDIF ALU_GROUP_BEGIN - ALU MOV S1025.x@chgr : A2[0].x {W} - ALU MOV S1025.y@chgr : A2[0].y {W} - ALU MOV S1025.z@chgr : A2[1].x {W} - ALU MOV S1025.w@chgr : A2[1].y {W} - ALU MOV S1027.x@group : A2[2].x {WL} + ALU MOV S46.x@chgr : A2[0].x {W} + ALU MOV S46.y@chgr : A2[0].y {W} + ALU MOV S46.z@chgr : A2[1].x {W} + ALU MOV S46.w@chgr : A2[1].y {W} + ALU MOV S47.x@group : A2[2].x {WL} ALU_GROUP_END ALU_GROUP_BEGIN - ALU MOV S1029.x@chgr : A2[0].z {W} - ALU MOV S1027.y@chgr : A2[2].y {W} - ALU MOV S1027.z@chgr : A2[3].x {W} - ALU MOV S1027.w@chgr : A2[3].y {W} - ALU MOV S1029.y@group : A2[0].w {WL} + ALU MOV S48.x@chgr : A2[0].z {W} + ALU MOV S47.y@chgr : A2[2].y {W} + ALU MOV S47.z@chgr : A2[3].x {W} + ALU MOV S47.w@chgr : A2[3].y {W} + ALU MOV S48.y@group : A2[0].w {WL} ALU_GROUP_END ALU_GROUP_BEGIN - ALU MOV S1031.x@chgr : A2[2].z {W} - ALU MOV S1031.y@chgr : A2[2].w {W} - ALU MOV S1029.z@chgr : A2[1].z {W} - ALU MOV S1029.w@chgr : A2[1].w {W} - ALU MOV S1031.z@group : A2[3].z {WL} + ALU MOV S49.x@chgr : A2[2].z {W} + ALU MOV S49.y@chgr : A2[2].w {W} + ALU MOV S48.z@chgr : A2[1].z {W} + ALU MOV S48.w@chgr : A2[1].w {W} + ALU MOV S49.z@group : A2[3].z {WL} ALU_GROUP_END ALU_GROUP_BEGIN - ALU MOV S1031.w@chgr : A2[3].w {WL} + ALU MOV S49.w@chgr : A2[3].w {WL} ALU_GROUP_END BLOCK_END BLOCK_START EXPORT_DONE POS 0 S19.xyzw -EXPORT PARAM 0 S1025.xyzw -EXPORT PARAM 1 S1027.xyzw -EXPORT PARAM 2 S1029.xyzw -EXPORT_DONE PARAM 3 S1031.xyzw +EXPORT PARAM 0 S46.xyzw +EXPORT PARAM 1 S47.xyzw +EXPORT PARAM 2 S48.xyzw +EXPORT_DONE PARAM 3 S49.xyzw BLOCK END\n )"; -- 2.7.4