Revision 332390 introduced a FetchStage class in llvm-mca.
By design, FetchStage owns all the instructions in-flight in the OoO Backend.
Before this change, new instructions were added to a DenseMap indexed by
instruction id. The problem with using a DenseMap is that elements are not
ordered by key. This was causing a massive slow down in method
FetchStage::postExecute(), which searches for instructions retired that can be
deleted.
This patch replaces the DenseMap with a std::map ordered by instruction index.
At the end of every cycle, we search for the first instruction which is not
marked as "retired", and we remove all the previous instructions before it.
This works well because instructions are retired in-order.
Before this patch, a debug build of llvm-mca (on my Ryzen linux machine) took
~8.0 seconds to simulate 3000 iterations of a x86 dot-product (a `vmulps,
vpermilps, vaddps, vpermilps, vaddps` sequence). With this patch, it now takes
~0.8s to run all the 3000 iterations.
llvm-svn: 332461
}
void FetchStage::postExecute(const InstRef &IR) {
- // Reclaim instructions that have been retired.
- llvm::remove_if(Instructions,
- [](InstMapPr &Pr) { return Pr.getSecond()->isRetired(); });
+ // Find the first instruction which hasn't been retired.
+ const InstMap::iterator It =
+ llvm::find_if(Instructions, [](const InstMap::value_type &KeyValuePair) {
+ return !KeyValuePair.second->isRetired();
+ });
+ if (It != Instructions.begin())
+ Instructions.erase(Instructions.begin(), It);
SM.updateNext();
}
#include "Instruction.h"
#include "SourceMgr.h"
#include "Stage.h"
-#include "llvm/ADT/DenseMap.h"
+#include <map>
namespace mca {
class FetchStage : public Stage {
- using InstMap = llvm::DenseMap<unsigned, std::unique_ptr<Instruction>>;
- using InstMapPr =
- llvm::detail::DenseMapPair<unsigned, std::unique_ptr<Instruction>>;
+ using InstMap = std::map<unsigned, std::unique_ptr<Instruction>>;
InstMap Instructions;
InstrBuilder &IB;
SourceMgr &SM;