From e6f74384b1fb963ea5e65cc7ee3e63a63ddf08f5 Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Tue, 25 Jul 2017 20:36:58 +0000 Subject: [PATCH] AMDGPU/SI: Force exports at the end for SI scheduler Patch by: Axel Davy Differential Revision: https://reviews.llvm.org/D34965 llvm-svn: 309027 --- llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp | 57 +++++++++++++++++++++++++++ llvm/lib/Target/AMDGPU/SIMachineScheduler.h | 3 ++ 2 files changed, 60 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index 34886c4..de3ff62 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1130,6 +1130,62 @@ void SIScheduleBlockCreator::regroupNoUserInstructions() { } } +void SIScheduleBlockCreator::colorExports() { + unsigned ExportColor = NextNonReservedID++; + SmallVector ExpGroup; + + // Put all exports together in a block. + // The block will naturally end up being scheduled last, + // thus putting exports at the end of the schedule, which + // is better for performance. + // However we must ensure, for safety, the exports can be put + // together in the same block without any other instruction. + // This could happen, for example, when scheduling after regalloc + // if reloading a spilled register from memory using the same + // register than used in a previous export. + // If that happens, do not regroup the exports. + for (unsigned SUNum : DAG->TopDownIndex2SU) { + const SUnit &SU = DAG->SUnits[SUNum]; + if (SIInstrInfo::isEXP(*SU.getInstr())) { + // Check the EXP can be added to the group safely, + // ie without needing any other instruction. + // The EXP is allowed to depend on other EXP + // (they will be in the same group). + for (unsigned j : ExpGroup) { + bool HasSubGraph; + std::vector SubGraph; + // By construction (topological order), if SU and + // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary + // in the parent graph of SU. +#ifndef NDEBUG + SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j], + HasSubGraph); + assert(!HasSubGraph); +#endif + SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU, + HasSubGraph); + if (!HasSubGraph) + continue; // No dependencies between each other + + // SubGraph contains all the instructions required + // between EXP SUnits[j] and EXP SU. + for (unsigned k : SubGraph) { + if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr())) + // Other instructions than EXP would be required in the group. + // Abort the groupping. + return; + } + } + + ExpGroup.push_back(SUNum); + } + } + + // The group can be formed. Give the color. + for (unsigned j : ExpGroup) + CurrentColoring[j] = ExportColor; +} + void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) { unsigned DAGSize = DAG->SUnits.size(); std::map RealID; @@ -1159,6 +1215,7 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria regroupNoUserInstructions(); colorMergeConstantLoadsNextGroup(); colorMergeIfPossibleNextGroupOnlyForReserved(); + colorExports(); // Put SUs of same color into same block Node2CurrentBlock.resize(DAGSize, -1); diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h index 122d0f6..d824e38 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -302,6 +302,9 @@ private: // (we'd want these groups be at the end). void regroupNoUserInstructions(); + // Give Reserved color to export instructions + void colorExports(); + void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant); void topologicalSort(); -- 2.7.4