From 187600daef6cf89f9ab1d8a8a44316b8536475c1 Mon Sep 17 00:00:00 2001 From: Hua Jiang Date: Thu, 29 Aug 2019 10:48:42 -0700 Subject: [PATCH] [VTA] Fix RewriteForceSerial Function logic issue. (#3854) Issue: RewriteForceSerial is a debug function to force instructions to be serialize instead of parrallel running, by doing so we can isolate some parallel problem or do performance compare between parallel and serialize. But this function have some problem, once get enabled by set debug flag, vta would stuck when running on pynq board. Analysis: once enable RewriteForceSerial, the dependency logic is different with default one, but we still use same logic to generate FINISH and other logic, this would cause dead lock. Solution: give a different dependency settings when enable RewriteForceSerial. --- vta/src/runtime.cc | 50 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/vta/src/runtime.cc b/vta/src/runtime.cc index d48cb3a..278addc 100644 --- a/vta/src/runtime.cc +++ b/vta/src/runtime.cc @@ -601,9 +601,11 @@ class InsnQueue : public BaseQueue { void RewriteForceSerial() { int insn_count = count(); VTAMemInsn* mem_ptr = reinterpret_cast(data()); + VTAMemInsn* mem_last_store_ptr = nullptr; + VTAMemInsn* mem_last_ptr = nullptr; for (int i = 1; i < insn_count; ++i) { - PipelineStage prev = GetPipelineStage(mem_ptr + i - 1); - PipelineStage now = GetPipelineStage(mem_ptr + i); + PipelineStage prev = GetPipelineStageAll(mem_ptr + i - 1); + PipelineStage now = GetPipelineStageAll(mem_ptr + i); if (prev == kLoadStage && now == kComputeStage) { mem_ptr[i - 1].push_prev_dep = false; mem_ptr[i - 1].push_next_dep = true; @@ -630,7 +632,30 @@ class InsnQueue : public BaseQueue { mem_ptr[i].pop_prev_dep = false; mem_ptr[i].pop_next_dep = false; } + if (now == kStoreStage) { + mem_last_store_ptr = &mem_ptr[i]; + } + mem_last_ptr = &mem_ptr[i]; + } + // set dependency to make sure all core instruction get excuted + // before last FINISH instruction + if (mem_last_store_ptr && mem_last_ptr == mem_last_store_ptr) { + mem_last_store_ptr->push_prev_dep = true; + if (!pending_pop_next_[kComputeStage]) { + DepPop(kStoreStage, kComputeStage); + } + CommitPendingPop(kComputeStage); + } else { + pending_pop_next_[kComputeStage] = 0; + } + DepPush(kComputeStage, kLoadStage); + DepPop(kLoadStage, kComputeStage); + if (!pending_pop_next_[kLoadStage]) { + DepPop(kComputeStage, kLoadStage); } + CommitPendingPop(kLoadStage); + DepPush(kLoadStage, kComputeStage); + CommitPendingPop(kComputeStage); } // Helper function: Get Opcode string const char* getOpcodeString(int opcode, bool use_imm) { @@ -912,6 +937,14 @@ class InsnQueue : public BaseQueue { LOG(FATAL) << "not reached"; return kNoneStage; } + + // Get stage of memory and computation + static PipelineStage GetPipelineStageAll(VTAMemInsn* insn) { + PipelineStage stage = GetPipelineStage(insn); + if (stage != kNoneStage) return stage; + return GetMemPipelineStage(insn->memory_type); + } + // Push no-op void PushNoop(int stage, bool push_prev_dep, bool push_next_dep, @@ -1069,13 +1102,14 @@ class CommandQueue { // Insert dependences to force serialization if (debug_flag_ & VTA_DEBUG_FORCE_SERIAL) { insn_queue_.RewriteForceSerial(); + } else { + // This will issue finish after last store finishes + insn_queue_.DepPush(kStoreStage, kComputeStage); + insn_queue_.DepPush(kLoadStage, kComputeStage); + insn_queue_.DepPop(kStoreStage, kComputeStage); + insn_queue_.DepPop(kLoadStage, kComputeStage); + insn_queue_.CommitPendingPop(kComputeStage); } - // This will issue finish after last store finishes - insn_queue_.DepPush(kStoreStage, kComputeStage); - insn_queue_.DepPush(kLoadStage, kComputeStage); - insn_queue_.DepPop(kStoreStage, kComputeStage); - insn_queue_.DepPop(kLoadStage, kComputeStage); - insn_queue_.CommitPendingPop(kComputeStage); // NOTE: FINISH cannot contain pop VTAGemInsn* insn = insn_queue_.CreateGemInsn(); insn->opcode = VTA_OPCODE_FINISH; -- 2.7.4