From 4d5859a524b18627320009995154728349e5f3bc Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Tue, 14 Feb 2023 17:35:33 +0100 Subject: [PATCH] r600/sfn: Start a new ALU CF on index use, not on index emission With that we can use the two IDX registers in parallel any might save some CF instructions. Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_scheduler.cpp | 55 ++++++++++++++++++++-- .../sfn/tests/sfn_split_address_loads_test.cpp | 2 - 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index 2026dc8..7eb2dbe 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -208,6 +208,10 @@ private: int m_lds_addr_count{0}; int m_alu_groups_scheduled{0}; r600_chip_class m_chip_class; + bool m_idx0_loading{false}; + bool m_idx1_loading{false}; + bool m_idx0_pending{false}; + bool m_idx1_pending{false}; }; Shader * @@ -574,15 +578,36 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks) group->set_scheduled(); group->fix_last_flag(); group->set_nesting_depth(m_current_block->nesting_depth()); + + auto [addr, is_index] = group->addr(); + if (is_index) { + if (addr->sel() == AddressRegister::idx0 && m_idx0_pending) { + assert(!group->has_lds_group_start()); + assert(m_current_block->expected_ar_uses() == 0); + start_new_block(out_blocks, Block::alu); + } + if (addr->sel() == AddressRegister::idx1 && m_idx1_pending) { + assert(!group->has_lds_group_start()); + assert(m_current_block->expected_ar_uses() == 0); + start_new_block(out_blocks, Block::alu); + } + } + m_current_block->push_back(group); + m_idx0_pending |= m_idx0_loading; + m_idx0_loading = false; + + m_idx1_pending |= m_idx1_loading; + m_idx1_loading = false; + if (group->has_lds_group_start()) m_current_block->lds_group_start(*group->begin()); if (group->has_lds_group_end()) m_current_block->lds_group_end(); - if (group->index_mode_load() || group->has_kill_op()) { + if (group->has_kill_op()) { assert(!group->has_lds_group_start()); assert(m_current_block->expected_ar_uses() == 0); start_new_block(out_blocks, Block::alu); @@ -652,6 +677,7 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id()); m_current_block->set_instr_flag(Instr::force_cf); + m_idx0_pending = m_idx1_pending = false; } m_current_block->set_type(type); } @@ -697,11 +723,30 @@ BlockScheduler::schedule_alu_to_group_vec(AluGroup *group) auto addr = std::get<0>((*old_i)->indirect_addr()); bool has_indirect_reg_load = addr != nullptr && addr->has_flag(Register::addr_or_idx); - if (std::get<0>((*old_i)->indirect_addr()) || - (!(*old_i)->has_alu_flag(alu_is_lds) && - ((*old_i)->opcode() == op1_set_cf_idx0 || - (*old_i)->opcode() == op1_set_cf_idx1))) + bool is_idx_load_on_eg = false; + if (!(*old_i)->has_alu_flag(alu_is_lds)) { + bool load_idx0_eg = (*old_i)->opcode() == op1_set_cf_idx0; + bool load_idx0_ca = ((*old_i)->opcode() == op1_mova_int && + (*old_i)->dest()->sel() == AddressRegister::idx0); + + bool load_idx1_eg = (*old_i)->opcode() == op1_set_cf_idx1; + bool load_idx1_ca = ((*old_i)->opcode() == op1_mova_int && + (*old_i)->dest()->sel() == AddressRegister::idx1); + + is_idx_load_on_eg = load_idx0_eg || load_idx1_eg; + + bool load_idx0 = load_idx0_eg || load_idx0_ca; + bool load_idx1 = load_idx1_eg || load_idx1_ca; + + + assert(!m_idx0_pending || !load_idx0); + assert(!m_idx1_pending || !load_idx1); + + m_idx0_loading |= load_idx0; + m_idx1_loading |= load_idx1; + } + if (has_indirect_reg_load || is_idx_load_on_eg) m_current_block->dec_expected_ar_uses(); alu_vec_ready.erase(old_i); diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp index 8177b4c..819e614 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp @@ -581,8 +581,6 @@ ALU_GROUP_END ALU_GROUP_BEGIN ALU MOVA_INT IDX0 : S3.x@free {L} ALU_GROUP_END -BLOCK_END -BLOCK_START ALU_GROUP_BEGIN ALU MOVA_INT IDX1 : S3.y@free {L} ALU_GROUP_END -- 2.7.4