From 8df72a93b6e8ed45b681232b36a2455e23656ac8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EB=B0=95=EC=A2=85=ED=98=84/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Staff=20Engineer/=EC=82=BC=EC=84=B1?= =?utf8?q?=EC=A0=84=EC=9E=90?= Date: Mon, 27 Aug 2018 11:03:27 +0900 Subject: [PATCH] [enco] Support Shuffle code generation (#1182) This commit implement Shuffle code generator which emits nested loops that corresponds to a shuffle instruction. Signed-off-by: Jonghyun Park --- contrib/enco/core/src/CppCode.cpp | 55 ++++++- contrib/enco/core/src/CppGen/Host.cpp | 289 ++++++++++++++++++++++++++++++++++ contrib/enco/core/src/CppGen/Host.h | 32 ++++ 3 files changed, 375 insertions(+), 1 deletion(-) create mode 100644 contrib/enco/core/src/CppGen/Host.cpp create mode 100644 contrib/enco/core/src/CppGen/Host.h diff --git a/contrib/enco/core/src/CppCode.cpp b/contrib/enco/core/src/CppCode.cpp index 0b689e8..dd38763 100644 --- a/contrib/enco/core/src/CppCode.cpp +++ b/contrib/enco/core/src/CppCode.cpp @@ -1,6 +1,9 @@ #include "CppCode.h" #include "CppGen/Global.h" +#include "CppGen/MemoryContext.h" + +#include "CppGen/Host.h" #include "Dims.h" @@ -10,6 +13,16 @@ #include #include +namespace +{ + +struct InvokeFunction +{ + pp::LinearDocument body; +}; + +} // namespace + namespace enco { @@ -18,6 +31,46 @@ void CppCode::dump(std::ostream &os) const auto m = _code->module(); Global global; + InvokeFunction invoke; + + MemoryContext mem; + + // Set dedicated memory region for network inputs + for (uint32_t n = 0; n < m->input()->size(); ++n) + { + mem.base(m->input()->at(n)->bag(), pp::fmt("net->inputs[", n, "].ptr")); + mem.size(m->input()->at(n)->bag(), pp::fmt("net->inputs[", n, "].len")); + } + + // Set dedicated memory region for network outputs + for (uint32_t n = 0; n < m->output()->size(); ++n) + { + mem.base(m->output()->at(n)->bag(), pp::fmt("net->outputs[", n, "].ptr")); + mem.size(m->output()->at(n)->bag(), pp::fmt("net->outputs[", n, "].len")); + } + + // Create Code Block Builder + HostBlockCompiler host_compiler{mem}; + + for (auto blk = m->block()->head(); blk; blk = blk->next()) + { + invoke.body.append("{"); + invoke.body.indent(); + + if (auto binder = _code->ann()->find(blk)) + { + throw std::runtime_error{"NYI; Android NN codegen is not supported, yet"}; + } + else + { + // Generate code on-the-fly for Android NN-incompatible blocks + auto lines = host_compiler.compile(blk); + invoke.body.append(*lines); + } + + invoke.body.unindent(); + invoke.body.append("}"); + } // // Generate full C++ source code with code snippet @@ -216,7 +269,7 @@ void CppCode::dump(std::ostream &os) const source.append("void ", name, "_invoke(", name, " *net) {"); source.indent(); - source.append("assert(\"NYI\");"); + source.append(invoke.body); source.unindent(); source.append("}"); diff --git a/contrib/enco/core/src/CppGen/Host.cpp b/contrib/enco/core/src/CppGen/Host.cpp new file mode 100644 index 0000000..6115051 --- /dev/null +++ b/contrib/enco/core/src/CppGen/Host.cpp @@ -0,0 +1,289 @@ +#include "Host.h" + +#include + +#include + +#include +#include + +namespace +{ + +/** + * @brief Data transfer between flat arrays + * + * Transfer(from, into) denotes the following C code: + * dst[into] = src[from]; + */ +class Transfer +{ +public: + Transfer(uint32_t from, uint32_t into) : _from{from}, _into{into} + { + // DO NOTHING + } + +public: + uint32_t from(void) const { return _from; } + uint32_t into(void) const { return _into; } + +private: + uint32_t _from; + uint32_t _into; +}; + +using TransferSequence = std::vector; + +/** + * @brief Convert Shuffle instruction as a sequence of data transfer + */ +TransferSequence as_transfer_sequence(const coco::Shuffle *shuffle) +{ + TransferSequence seq; + + for (uint32_t n = 0; n < shuffle->into()->size(); ++n) + { + seq.emplace_back(shuffle->at(n).value(), n); + } + + return seq; +} + +/** + * Given a sequence of N data transfers, + * find_loop tries to compute count, src_step, dst_step that satisfies + * the following properties: + * + * First, N should be a multiple of count. + * Below we refer to that multiplier as 'window' (= N / count) + * + * Second, + * for all n in [0, count), + * for all k in [0, window), + * from[n * window + k] == from[k] + src_step, and + * into[n * window + k] == into[k] + dst_step + */ +bool find_loop(TransferSequence::const_iterator beg, TransferSequence::const_iterator end, + uint32_t *p_count, uint32_t *p_src_step, uint32_t *p_dst_step) +{ + assert(p_count != nullptr); + assert(p_src_step != nullptr); + assert(p_dst_step != nullptr); + + const uint32_t size = end - beg; + + for (uint32_t window = 1; window <= size; ++window) + { + if (size % window != 0) + { + continue; + } + + auto src_step_at = [&beg, window](uint32_t n) { + return (beg + n)->from() - (beg + n - window)->from(); + }; + + auto dst_step_at = [&beg, window](uint32_t n) { + return (beg + n)->into() - (beg + n - window)->into(); + }; + + const uint32_t count = size / window; + const uint32_t src_step = src_step_at(window); + const uint32_t dst_step = dst_step_at(window); + + bool consistent = true; + + for (uint32_t n = window + 1; n < size; ++n) + { + if ((src_step_at(n) != src_step) || (dst_step_at(n) != dst_step)) + { + consistent = false; + break; + } + } + + if (consistent) + { + *p_count = count; + *p_src_step = src_step; + *p_dst_step = dst_step; + return true; + } + } + + return false; +} + +/** + * @brief Single transfer loop (a triple of count, source step, detination step) + */ +class TransferLoop +{ +public: + class Step + { + public: + Step(uint32_t src, uint32_t dst) : _src{src}, _dst{dst} + { + // DO NOTHING + } + + public: + uint32_t src(void) const { return _src; } + uint32_t dst(void) const { return _dst; } + + private: + uint32_t _src; + uint32_t _dst; + }; + +public: + TransferLoop(uint32_t count, uint32_t src_step, uint32_t dst_step) + : _count{count}, _step{src_step, dst_step} + { + // DO NOTHING + } + +public: + uint32_t count(void) const { return _count; } + const Step &step(void) const { return _step; } + +private: + uint32_t _count; + Step _step; +}; + +/** + * @brief Nested transfer loops + */ +using TransferNest = std::vector; + +/** + * @brief Construct nested transfer loop-nest that correponds to a given Shuffle instruction + */ +TransferNest as_nest(const coco::Shuffle *shuffle) +{ + TransferNest nest; + TransferSequence seq = as_transfer_sequence(shuffle); + + auto beg = seq.begin(); + auto end = seq.end(); + + uint32_t window = end - beg; + uint32_t count = 0; + uint32_t src_step = 0; + uint32_t dst_step = 0; + + while ((window > 1) && find_loop(beg, end, &count, &src_step, &dst_step)) + { + assert(window % count == 0); + + window /= count; + end = beg + window; + + nest.emplace_back(count, src_step, dst_step); + } + + return nest; +}; + +uint32_t loop_count(const TransferNest &nest) +{ + uint32_t count = 1; + + for (const auto &loop : nest) + { + count *= loop.count(); + } + + return count; +}; + +class InstrPrinter : public coco::Instr::DefaultVisitor +{ +public: + InstrPrinter(const enco::MemoryContext &mem) : _mem(mem) + { + // DO NOTHING + } + +private: + pp::LinearDocument visit(const coco::Shuffle *shuffle) override + { + auto from = shuffle->from(); + auto into = shuffle->into(); + + // + // Analyze 'Shuffle' pattern, and convert it as nested loops + // + auto nest = as_nest(shuffle); + assert(shuffle->into()->size() % loop_count(nest) == 0); + uint32_t window = shuffle->into()->size() / loop_count(nest); + + // + // Generate loop body + // + pp::EnclosedDocument loop_body; + + auto var_at = [](uint32_t lv) { return pp::fmt("_", lv); }; + + for (uint32_t lv = 0; lv < nest.size(); ++lv) + { + auto var = var_at(lv); + + loop_body.front().append("for (uint32_t ", var, " = 0; ", var, " < ", nest.at(lv).count(), + "; ++", var, ") {"); + loop_body.front().indent(); + + loop_body.back().append("}"); + loop_body.back().indent(); + } + + std::string src_index = "0"; + std::string dst_index = "0"; + + for (uint32_t lv = 0; lv < nest.size(); ++lv) + { + src_index += pp::fmt(" + ", nest.at(lv).step().src(), " * ", var_at(lv)); + dst_index += pp::fmt(" + ", nest.at(lv).step().dst(), " * ", var_at(lv)); + } + + for (uint32_t n = 0; n < window; ++n) + { + const auto src_base = pp::fmt("reinterpret_cast(", _mem.base(from), ")"); + const auto dst_base = pp::fmt("reinterpret_cast(", _mem.base(into), ")"); + + loop_body.front().append(dst_base, "[", dst_index, " + ", n, "] = ", src_base, "[", src_index, + " + ", shuffle->at(n).value(), "];"); + } + + pp::LinearDocument res; + res.append(loop_body); + return res; + } + +private: + const enco::MemoryContext &_mem; +}; + +} // namespace + +namespace enco +{ + +std::unique_ptr HostBlockCompiler::compile(const coco::Block *blk) const +{ + InstrPrinter prn{_mem}; + + auto res = nncc::foundation::make_unique(); + + for (auto ins = blk->instr()->head(); ins; ins = ins->next()) + { + res->append(ins->accept(prn)); + } + + return std::move(res); +} + +} // namespace enco diff --git a/contrib/enco/core/src/CppGen/Host.h b/contrib/enco/core/src/CppGen/Host.h new file mode 100644 index 0000000..826f4bf --- /dev/null +++ b/contrib/enco/core/src/CppGen/Host.h @@ -0,0 +1,32 @@ +#ifndef __ENCO_CPP_GEN_HOST_H__ +#define __ENCO_CPP_GEN_HOST_H__ + +#include "CppGen/MemoryContext.h" + +#include +#include + +namespace enco +{ + +/*** + * @brief Generate C++ code that does not depend on Anroid NN API + */ +class HostBlockCompiler +{ +public: + HostBlockCompiler(const enco::MemoryContext &mem) : _mem(mem) + { + // DO NOTHING + } + +public: + std::unique_ptr compile(const coco::Block *blk) const; + +private: + const enco::MemoryContext &_mem; +}; + +} // namespace enco + +#endif // __ENCO_CPP_GEN_HOST_H__ -- 2.7.4