src/schedule/*.cc
)
-file(GLOB_RECURSE RELAY_SRCS
- src/relay/*.cc
+file(GLOB_RECURSE RELAY_OP_SRCS
+ src/relay/op/*.cc
)
-list(APPEND COMPILER_SRCS ${RELAY_SRCS})
+file(GLOB_RECURSE RELAY_PASS_SRCS
+ src/relay/pass/*.cc
+ )
+file(GLOB RELAY_BACKEND_SRCS
+ src/relay/backend/*.cc
+ src/relay/backend/vm/*.cc
+ )
+file(GLOB_RECURSE RELAY_IR_SRCS
+ src/relay/ir/*.cc
+ )
+file(GLOB_RECURSE RELAY_QNN_SRCS
+ src/relay/qnn/*.cc
+)
+list(APPEND COMPILER_SRCS ${RELAY_OP_SRCS})
+list(APPEND COMPILER_SRCS ${RELAY_PASS_SRCS})
+list(APPEND COMPILER_SRCS ${RELAY_BACKEND_SRCS})
+list(APPEND COMPILER_SRCS ${RELAY_IR_SRCS})
+list(APPEND COMPILER_SRCS ${RELAY_QNN_SRCS})
+
+if(USE_VM_PROFILER)
+ message(STATUS "Build compiler with Relay VM profiler support...")
+ file(GLOB BACKEND_VM_PROFILER_SRCS src/relay/backend/vm/profiler/*.cc)
+ list(APPEND COMPILER_SRCS ${BACKEND_VM_PROFILER_SRCS})
+endif(USE_VM_PROFILER)
file(GLOB DATATYPE_SRCS src/codegen/datatype/*.cc)
list(APPEND COMPILER_SRCS ${DATATYPE_SRCS})
endif(USE_GRAPH_RUNTIME_DEBUG)
endif(USE_GRAPH_RUNTIME)
+if(USE_VM_PROFILER)
+ message(STATUS "Build with Relay VM profiler support...")
+ file(GLOB RUNTIME_VM_PROFILER_SRCS src/runtime/vm/profiler/*.cc)
+ list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS})
+endif(USE_VM_PROFILER)
+
# Module rules
include(cmake/modules/VTA.cmake)
include(cmake/modules/CUDA.cmake)
echo set\\(USE_GRAPH_RUNTIME ON\\) >> config.cmake
echo set\\(USE_STACKVM_RUNTIME ON\\) >> config.cmake
echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
+ echo set\\(USE_VM_PROFILER ON\\) >> config.cmake
echo set\\(USE_ANTLR ON\\) >> config.cmake
echo set\\(USE_BLAS openblas\\) >> config.cmake
echo set\\(CMAKE_CXX_COMPILER g++\\) >> config.cmake
echo set\\(USE_VULKAN ON\\) >> config.cmake
echo set\\(USE_MICRO ON\\) >> config.cmake
echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
+ echo set\\(USE_VM_PROFILER ON\\) >> config.cmake
echo set\\(CMAKE_CXX_COMPILER clang-7\\) >> config.cmake
echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake
"""
echo set\\(USE_SORT ON\\) >> config.cmake
echo set\\(USE_MICRO ON\\) >> config.cmake
echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
+ echo set\\(USE_VM_PROFILER ON\\) >> config.cmake
echo set\\(USE_LLVM llvm-config-8\\) >> config.cmake
echo set\\(USE_NNPACK ON\\) >> config.cmake
echo set\\(NNPACK_PATH /NNPACK/build/\\) >> config.cmake
echo set\\(USE_SORT ON\\) >> config.cmake
echo set\\(USE_RPC ON\\) >> config.cmake
echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
+ echo set\\(USE_VM_PROFILER ON\\) >> config.cmake
echo set\\(USE_LLVM llvm-config-4.0\\) >> config.cmake
echo set\\(CMAKE_CXX_COMPILER g++\\) >> config.cmake
echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake
# Whether enable additional graph debug functions
set(USE_GRAPH_RUNTIME_DEBUG OFF)
+# Whether enable additional vm profiler functions
+set(USE_VM_PROFILER OFF)
+
# Whether build with LLVM support
# Requires LLVM version >= 4.0
#
*/
class VirtualMachine : public runtime::ModuleNode {
public:
- PackedFunc GetFunction(const std::string& name,
- const std::shared_ptr<ModuleNode>& sptr_to_self) final;
+ /*!
+ * \brief Get a PackedFunc from module.
+ *
+ * The PackedFunc may not be fully initialized,
+ * there might still be first time running overhead when
+ * executing the function on certain devices.
+ * For benchmarking, use prepare to eliminate
+ *
+ * \param name the name of the function.
+ * \param sptr_to_self The shared_ptr that points to this module node.
+ *
+ * \return PackedFunc(nullptr) when it is not available.
+ *
+ * \note The function will always remain valid.
+ * If the function needs resource from the module(e.g. late linking),
+ * it should capture sptr_to_self.
+ */
+ virtual PackedFunc GetFunction(const std::string& name,
+ const std::shared_ptr<ModuleNode>& sptr_to_self);
+
+ /*!
+ * \brief Invoke a PackedFunction
+ *
+ * \param packed_index The offset of the PackedFunction in all functions.
+ * \param func The PackedFunction to be invoked.
+ * \param arg_count The number of arguments to the PackedFunction.
+ * \param output_size The number of outputs of the PackedFunction.
+ * \param args Arguments to the PackedFunction.
+ *
+ * \note The return value will be stored in the last output_size slots of args.
+ */
+ virtual void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count,
+ Index output_size, const std::vector<Object>& args);
+
+ virtual ~VirtualMachine() {}
const char* type_key() const final {
return "VirtualMachine";
*/
void RunLoop();
+ /*! \brief Get device context for params.
+ */
+ TVMContext GetParamsContext() const;
+
/*!
* \brief Load parameters from the parameter bytearray.
* \param params The binary file that contains parameters.
*/
void InvokeGlobal(const VMFunction& func, const std::vector<Object>& args);
- /*! \brief Get device context for params.
- */
- TVMContext GetParamsContext() const;
/*! \brief The parameter name to data mapping. */
std::unordered_map<std::string, Object> params_;
from . import param_dict
from . import feature
from .backend import vm
+from .backend import profiler_vm
from .backend import serializer
from .backend import deserializer
from .backend import vmobj
--- /dev/null
+# License .to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=no-else-return, unidiomatic-typecheck, undefined-variable, invalid-name
+"""
+The Relay Virtual Machine profiler.
+
+Provides extra APIs for profiling vm execution.
+"""
+import tvm
+from . import vm, _vm
+
+def _update_target(target):
+ target = target if target else tvm.target.current_target()
+ if target is None:
+ raise ValueError("Target is not set in env or passed as argument.")
+
+ tgts = {}
+ if isinstance(target, (str, tvm.target.Target)):
+ dev_type = tvm.expr.IntImm("int32", tvm.nd.context(str(target)).device_type)
+ tgts[dev_type] = tvm.target.create(target)
+ elif isinstance(target, dict):
+ for dev, tgt in target.items():
+ dev_type = tvm.expr.IntImm("int32", tvm.nd.context(dev).device_type)
+ tgts[dev_type] = tvm.target.create(tgt)
+ else:
+ raise TypeError("target is expected to be str, tvm.target.Target, " +
+ "or dict of str to str/tvm.target.Target, but received " +
+ "{}".format(type(target)))
+ return tgts
+
+class VMCompilerProfiler(vm.VMCompiler):
+ """Build Relay module to run on VM runtime."""
+ def __init__(self):
+ super().__init__()
+ self.mod = _vm._VMCompilerProfiler()
+ self._compile = self.mod["compile"]
+ self._get_vm = self.mod["get_vm"]
+
+ def compile(self, mod, target=None, target_host=None):
+ """
+ Parameters
+ ----------
+ mod : relay.Module
+ The Relay module to build.
+
+ target : str, :any:`tvm.target.Target`, or dict of str(i.e.
+ device/context name) to str/tvm.target.Target, optional
+ For heterogeneous compilation, it is a dictionary indicating context
+ to target mapping. For homogeneous compilation, it is a build target.
+
+ target_host : str or :any:`tvm.target.Target`, optional
+ Host compilation target, if target is device.
+ When TVM compiles device specific program such as CUDA,
+ we also need host(CPU) side code to interact with the driver
+ to setup the dimensions and parameters correctly.
+ target_host is used to specify the host side codegen target.
+ By default, llvm is used if it is enabled,
+ otherwise a stackvm intepreter is used.
+
+ Returns
+ -------
+ vm : VirtualMachineProfiler
+ The profile VM runtime.
+ """
+ target = _update_target(target)
+ self._compile(mod, target, target_host)
+ return VirtualMachineProfiler(self._get_vm())
+
+class VirtualMachineProfiler(vm.VirtualMachine):
+ """Relay profile VM runtime."""
+ def __init__(self, mod):
+ super().__init__(mod)
+ self._get_stat = self.mod["get_stat"]
+
+ def get_stat(self):
+ return self._get_stat()
#include <tvm/relay/transform.h>
#include <tvm/runtime/vm.h>
#include <iostream>
+#include <memory>
+#include <set>
+#include <string>
+#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "../../../runtime/vm/naive_allocator.h"
#include "../../backend/compile_engine.h"
#include "../../pass/pass_util.h"
+#include "compiler.h"
namespace tvm {
namespace relay {
// (@jroesch): VM passes, eventually declare as passes.
bool IsClosure(const Function& func);
-template <typename T, typename U>
-using NodeMap = std::unordered_map<T, U, NodeHash, NodeEqual>;
-using TagMap = NodeMap<tvm::relay::Constructor, Index>;
-using TagNameMap = std::unordered_map<size_t, tvm::relay::Constructor>;
-using GlobalMap = NodeMap<GlobalVar, Index>;
-using ConstMap = NodeMap<Constant, Index>;
-using ConstTensorShapeMap = NodeMap<TensorType, std::pair<Index, NDArray>>;
-using TargetsMap = Map<tvm::Integer, tvm::Target>;
-
-struct VMCompilerContext {
- // The module context for the compilation
- Module module;
- // Error reporter
- ErrorReporter err_reporter;
- // Map from a unique integer to ADT constructor tag
- TagNameMap tag_index_map;
- // Map from ADT constructor tag to a unique integer
- TagMap tag_map;
- // Map from global var to a unique integer
- GlobalMap global_map;
- // Map from Const object to its index in const pool
- ConstMap const_map;
- // Map from Const tensor shape to its index in const pool
- ConstTensorShapeMap const_tensor_shape_map;
- // List of lowered functions
- std::vector<LoweredFunc> lowered_funcs;
- // The functions that have been lowered.
- std::unordered_map<LoweredFunc, size_t, NodeHash, NodeEqual> seen_funcs;
-};
-
// Compute the constant pool, i.e a mapping from Constant node to constant index.
struct ConstantPool : ExprVisitor {
std::set<GlobalVar> visited;
};
-class VMCompiler : public runtime::ModuleNode {
- public:
- PackedFunc GetFunction(const std::string& name,
- const std::shared_ptr<ModuleNode>& sptr_to_self) final {
- if (name == "compile") {
- return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
- CHECK_EQ(args.num_args, 3);
- this->Compile(args[0], args[1], args[2]);
- });
- } else if (name == "get_vm") {
- return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
- *rv = runtime::Module(vm_);
- });
- } else {
- LOG(FATAL) << "Unknown packed function: " << name;
- return PackedFunc([sptr_to_self, name](TVMArgs args, TVMRetValue* rv) {});
- }
+PackedFunc VMCompiler::GetFunction(const std::string& name,
+ const std::shared_ptr<ModuleNode>& sptr_to_self) {
+ if (name == "compile") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ CHECK_EQ(args.num_args, 3);
+ this->Compile(args[0], args[1], args[2]);
+ });
+ } else if (name == "get_vm") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ *rv = runtime::Module(vm_);
+ });
+ } else {
+ LOG(FATAL) << "Unknown packed function: " << name;
+ return PackedFunc([sptr_to_self, name](TVMArgs args, TVMRetValue* rv) {});
}
+}
- const char* type_key() const final {
- return "VMCompiler";
+void VMCompiler::Compile(const Module& mod_ref,
+ const TargetsMap& targets,
+ const tvm::Target& target_host) {
+ CHECK_EQ(targets.size(), 1)
+ << "Currently VM compiler doesn't support heterogeneous compilation";
+
+ InitVM();
+ targets_ = targets;
+ target_host_ = target_host;
+
+ // Run some optimizations first, this code should
+ // be moved to pass manager.
+ context_.module = OptimizeModule(mod_ref);
+
+ // Populate the global map.
+ //
+ // This maps global variables to a global index
+ // in the VMFunction table.
+ PopulateGlobalMap();
+
+ // Next we populate constant map.
+ auto constant_analysis_result = LayoutConstantPool(context_.module);
+ context_.const_map = std::get<0>(constant_analysis_result);
+ context_.const_tensor_shape_map = std::get<1>(constant_analysis_result);
+
+ // Next we get ready by allocating space for
+ // the global state.
+ vm_->functions.resize(context_.module->functions.size());
+ vm_->constants.resize(context_.const_map.size() + context_.const_tensor_shape_map.size());
+
+ for (auto pair : context_.const_map) {
+ vm_->constants[pair.second] = Object::Tensor(pair.first->data);
}
- std::shared_ptr<VirtualMachine> GetVirtualMachine() const {
- return vm_;
+ for (auto pair : context_.const_tensor_shape_map) {
+ vm_->constants[pair.second.first] = Object::Tensor(pair.second.second);
}
- void Compile(const Module& mod_ref,
- const TargetsMap& targets,
- const tvm::Target& target_host) {
- CHECK_EQ(targets.size(), 1)
- << "Currently VM compiler doesn't support heterogeneous compilation";
- targets_ = targets;
- target_host_ = target_host;
- vm_ = std::make_shared<VirtualMachine>();
-
- // Run some optimizations first, this code should
- // be moved to pass manager.
- context_.module = OptimizeModule(mod_ref);
-
- // Populate the global map.
- //
- // This maps global variables to a global index
- // in the VMFunction table.
- PopulateGlobalMap();
+ for (auto named_func : context_.module->functions) {
+ auto gvar = named_func.first;
+ auto func = named_func.second;
+ VMFunctionCompiler func_compiler(&context_, targets_);
+ auto vm_func = func_compiler.Compile(gvar, func);
- // Next we populate constant map.
- auto constant_analysis_result = LayoutConstantPool(context_.module);
- context_.const_map = std::get<0>(constant_analysis_result);
- context_.const_tensor_shape_map = std::get<1>(constant_analysis_result);
-
- // Next we get ready by allocating space for
- // the global state.
- vm_->functions.resize(context_.module->functions.size());
- vm_->constants.resize(context_.const_map.size() + context_.const_tensor_shape_map.size());
-
- for (auto pair : context_.const_map) {
- vm_->constants[pair.second] = Object::Tensor(pair.first->data);
- }
-
- for (auto pair : context_.const_tensor_shape_map) {
- vm_->constants[pair.second.first] = Object::Tensor(pair.second.second);
- }
-
- for (auto named_func : context_.module->functions) {
- auto gvar = named_func.first;
- auto func = named_func.second;
- VMFunctionCompiler func_compiler(&context_, targets_);
- auto vm_func = func_compiler.Compile(gvar, func);
-
- size_t func_index = context_.global_map.at(gvar);
- CHECK(func_index < vm_->functions.size());
- vm_->functions[func_index] = vm_func;
- }
+ size_t func_index = context_.global_map.at(gvar);
+ CHECK(func_index < vm_->functions.size());
+ vm_->functions[func_index] = vm_func;
+ }
#if USE_RELAY_DEBUG
- for (auto vm_func : vm_->functions) {
- DLOG(INFO) << vm_func << "-------------";
- }
+ for (auto vm_func : vm_->functions) {
+ DLOG(INFO) << vm_func << "-------------";
+ }
#endif // USE_RELAY_DEBUG
- LibraryCodegen();
+ LibraryCodegen();
- for (auto gv : context_.global_map) {
- vm_->global_map.insert({gv.first->name_hint, gv.second});
- }
+ for (auto gv : context_.global_map) {
+ vm_->global_map.insert({gv.first->name_hint, gv.second});
}
+}
- protected:
- Module OptimizeModule(const Module& mod) {
- // TODO(@icemelon9): check number of targets and build config, add more optimization pass
- transform::Sequential seq({transform::SimplifyInference(),
- transform::ToANormalForm(),
- transform::InlinePrimitives(),
- transform::LambdaLift(),
- transform::InlinePrimitives(),
- transform::FuseOps()});
- auto pass_ctx = transform::PassContext::Create();
- tvm::With<relay::transform::PassContext> ctx(pass_ctx);
- return seq(mod);
- }
-
- void PopulateGlobalMap() {
- // First we populate global map.
- size_t global_index = 0;
- for (auto named_func : context_.module->functions) {
- auto gvar = named_func.first;
- context_.global_map.insert({gvar, global_index++});
- }
- }
+Module VMCompiler::OptimizeModule(const Module& mod) {
+ // TODO(@icemelon9): check number of targets and build config, add more optimization pass
+ transform::Sequential seq({transform::SimplifyInference(),
+ transform::ToANormalForm(),
+ transform::InlinePrimitives(),
+ transform::LambdaLift(),
+ transform::InlinePrimitives(),
+ transform::FuseOps()});
+ auto pass_ctx = transform::PassContext::Create();
+ tvm::With<relay::transform::PassContext> ctx(pass_ctx);
+ return seq(mod);
+}
- void LibraryCodegen() {
- auto const& lowered_funcs = context_.lowered_funcs;
- if (lowered_funcs.size() == 0) {
- return;
- }
- // TODO(@icemelon9): support heterogeneous targets
- Target target;
- for (auto kv : targets_) {
- target = kv.second;
- }
- if (const auto* f = runtime::Registry::Get("relay.backend.build")) {
- runtime::Module mod =
- (*f)(tvm::Array<LoweredFunc>(lowered_funcs.begin(), lowered_funcs.end()), target,
- target_host_);
- CHECK(mod.operator->());
- vm_->lib = mod;
- } else {
- LOG(FATAL) << "relay.backend.build is not registered";
- }
- size_t primitive_index = 0;
- for (auto lfunc : lowered_funcs) {
- vm_->primitive_map.insert({lfunc->name, primitive_index++});
- }
+void VMCompiler::PopulateGlobalMap() {
+ // First we populate global map.
+ size_t global_index = 0;
+ for (auto named_func : context_.module->functions) {
+ auto gvar = named_func.first;
+ context_.global_map.insert({gvar, global_index++});
}
+}
- protected:
- /*! \brief Target devices. */
- TargetsMap targets_;
- /*! \brief Target host device. */
- tvm::Target target_host_;
- /*! \brief Global shared meta data */
- VMCompilerContext context_;
- /*! \brief Compiled virtual machine. */
- std::shared_ptr<VirtualMachine> vm_;
-};
+void VMCompiler::LibraryCodegen() {
+ auto const& lowered_funcs = context_.lowered_funcs;
+ if (lowered_funcs.size() == 0) {
+ return;
+ }
+ // TODO(@icemelon9): support heterogeneous targets
+ Target target;
+ for (auto kv : targets_) {
+ target = kv.second;
+ }
+ if (const auto* f = runtime::Registry::Get("relay.backend.build")) {
+ runtime::Module mod =
+ (*f)(tvm::Array<LoweredFunc>(lowered_funcs.begin(), lowered_funcs.end()), target,
+ target_host_);
+ CHECK(mod.operator->());
+ vm_->lib = mod;
+ } else {
+ LOG(FATAL) << "relay.backend.build is not registered";
+ }
+ size_t primitive_index = 0;
+ for (auto lfunc : lowered_funcs) {
+ vm_->primitive_map.insert({lfunc->name, primitive_index++});
+ }
+}
runtime::Module CreateVMCompiler() {
std::shared_ptr<VMCompiler> exec = std::make_shared<VMCompiler>();
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file src/relay/backend/vm/compiler.h
+ * \brief A compiler from relay::Module to the VM byte code.
+ */
+
+#ifndef TVM_RELAY_BACKEND_VM_COMPILER_H_
+#define TVM_RELAY_BACKEND_VM_COMPILER_H_
+
+#include <tvm/relay/error.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/interpreter.h>
+#include <tvm/logging.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/vm.h>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+#include "../../../runtime/vm/profiler/vm.h"
+#include "../../../runtime/vm/naive_allocator.h"
+#include "../../backend/compile_engine.h"
+#include "../../pass/pass_util.h"
+
+namespace tvm {
+namespace relay {
+namespace vm {
+
+using namespace tvm::runtime;
+using namespace tvm::runtime::vm;
+using namespace relay::transform;
+
+template <typename T, typename U>
+using NodeMap = std::unordered_map<T, U, NodeHash, NodeEqual>;
+using TagMap = NodeMap<tvm::relay::Constructor, Index>;
+using TagNameMap = std::unordered_map<size_t, tvm::relay::Constructor>;
+using GlobalMap = NodeMap<GlobalVar, Index>;
+using ConstMap = NodeMap<Constant, Index>;
+using ConstTensorShapeMap = NodeMap<TensorType, std::pair<Index, NDArray>>;
+using TargetsMap = Map<tvm::Integer, tvm::Target>;
+
+struct VMCompilerContext {
+ // The module context for the compilation
+ Module module;
+ // Error reporter
+ ErrorReporter err_reporter;
+ // Map from a unique integer to ADT constructor tag
+ TagNameMap tag_index_map;
+ // Map from ADT constructor tag to a unique integer
+ TagMap tag_map;
+ // Map from global var to a unique integer
+ GlobalMap global_map;
+ // Map from Const object to its index in const pool
+ ConstMap const_map;
+ // Map from Const tensor shape to its index in const pool
+ ConstTensorShapeMap const_tensor_shape_map;
+ // List of lowered functions
+ std::vector<LoweredFunc> lowered_funcs;
+ // The functions that have been lowered.
+ std::unordered_map<LoweredFunc, size_t, NodeHash, NodeEqual> seen_funcs;
+};
+
+
+class VMCompiler : public runtime::ModuleNode {
+ public:
+ virtual ~VMCompiler() {}
+
+ virtual PackedFunc GetFunction(const std::string& name,
+ const std::shared_ptr<ModuleNode>& sptr_to_self);
+
+ const char* type_key() const {
+ return "VMCompiler";
+ }
+
+ std::shared_ptr<VirtualMachine> GetVirtualMachine() const {
+ return vm_;
+ }
+
+ virtual void InitVM() {
+ vm_ = std::make_shared<VirtualMachine>();
+ }
+
+ void Compile(const Module& mod_ref,
+ const TargetsMap& targets,
+ const tvm::Target& target_host);
+
+ protected:
+ Module OptimizeModule(const Module& mod);
+
+ void PopulateGlobalMap();
+
+ void LibraryCodegen();
+
+ protected:
+ /*! \brief Target devices. */
+ TargetsMap targets_;
+ /*! \brief Target host device. */
+ tvm::Target target_host_;
+ /*! \brief Global shared meta data */
+ VMCompilerContext context_;
+ /*! \brief Compiled virtual machine. */
+ std::shared_ptr<VirtualMachine> vm_;
+};
+
+} // namespace vm
+} // namespace relay
+} // namespace tvm
+
+#endif // TVM_RELAY_BACKEND_VM_COMPILER_H_
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file src/relay/backend/vm/profiler/compiler.cc
+ * \brief A compiler from relay::Module to the VM byte code.
+ */
+
+#include "../../../../runtime/vm/profiler/vm.h"
+#include "../compiler.h"
+
+namespace tvm {
+namespace relay {
+namespace vm {
+
+class VMCompilerDebug : public VMCompiler {
+ public:
+ VMCompilerDebug() {}
+ void InitVM() override { vm_ = std::make_shared<VirtualMachineDebug>(); }
+ virtual ~VMCompilerDebug() {}
+};
+
+runtime::Module CreateVMCompilerDebug() {
+ std::shared_ptr<VMCompilerDebug> exec = std::make_shared<VMCompilerDebug>();
+ return runtime::Module(exec);
+}
+
+TVM_REGISTER_GLOBAL("relay._vm._VMCompilerProfiler")
+ .set_body([](TVMArgs args, TVMRetValue* rv) {
+ *rv = CreateVMCompilerDebug();
+ });
+
+} // namespace vm
+} // namespace relay
+} // namespace tvm
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file src/runtime/vm/profiler/vm.cc
+ * \brief The Relay debug virtual machine.
+ */
+
+#include <tvm/runtime/registry.h>
+#include <tvm/runtime/vm.h>
+
+#include <algorithm>
+#include <chrono>
+#include <iomanip>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "vm.h"
+
+namespace tvm {
+namespace runtime {
+namespace vm {
+
+PackedFunc VirtualMachineDebug::GetFunction(
+ const std::string& name, const std::shared_ptr<ModuleNode>& sptr_to_self) {
+ if (name == "get_stat") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ double total_duration = 0.0;
+ std::ostringstream os;
+ os << std::setw(30) << std::left << "#OpName"
+ << "\t" << std::setw(10) << std::left << "#InvokeCount"
+ << "\t"
+ << "#Duration(us): Sum/Mean/Min/Max" << std::endl;
+
+ for (auto kv : op_durations) {
+ auto vals = op_durations[kv.first];
+ auto sum = std::accumulate(vals.begin(), vals.end(), 0.0);;
+ auto mean = sum / static_cast<double>(vals.size());
+ auto min_value = *std::min_element(vals.begin(), vals.end());
+ auto max_value = *std::max_element(vals.begin(), vals.end());
+
+ os << std::setw(30) << std::left << packed_index_map[kv.first] << "\t"
+ << std::setw(10) << std::left << op_invokes[kv.first] << "\t"
+ << sum << "/" << mean << "/" << min_value << "/" << max_value << std::endl;
+
+ total_duration += sum;
+ }
+ os << "Total Duration " << total_duration << " us" << std::endl;
+ *rv = os.str();
+ });
+ } else if (name == "init") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ CHECK_EQ(args.size() % 2, 0);
+ std::vector<TVMContext> contexts;
+ for (int i = 0; i < args.size() / 2; ++i) {
+ TVMContext ctx;
+ int device_type = args[i * 2];
+ ctx.device_type = DLDeviceType(device_type);
+ ctx.device_id = args[i * 2 + 1];
+ contexts.push_back(ctx);
+ }
+ this->Init(contexts);
+ });
+ } else {
+ return VirtualMachine::GetFunction(name, sptr_to_self);
+ }
+}
+
+void VirtualMachineDebug::Init(const std::vector<TVMContext>& ctxs) {
+ VirtualMachine::Init(ctxs);
+ for (auto kv : primitive_map) {
+ packed_index_map[kv.second] = kv.first;
+ op_invokes[kv.second] = 0;
+ }
+}
+
+void VirtualMachineDebug::InvokePacked(Index packed_index,
+ const PackedFunc& func, Index arg_count,
+ Index output_size,
+ const std::vector<Object>& args) {
+ auto ctx = VirtualMachine::GetParamsContext();
+ auto op_begin = std::chrono::high_resolution_clock::now();
+ VirtualMachine::InvokePacked(packed_index, func, arg_count, output_size,
+ args);
+ TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
+ auto op_end = std::chrono::high_resolution_clock::now();
+ double op_duration =
+ std::chrono::duration_cast<std::chrono::duration<double> >(op_end -
+ op_begin)
+ .count();
+
+ op_durations[packed_index].push_back(op_duration * 1e6);
+ op_invokes[packed_index] += 1;
+}
+
+} // namespace vm
+} // namespace runtime
+} // namespace tvm
--- /dev/null
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file src/runtime/vm/profiler/vm.h
+ * \brief The Relay debug virtual machine.
+ */
+
+#ifndef TVM_RUNTIME_VM_PROFILER_VM_H_
+#define TVM_RUNTIME_VM_PROFILER_VM_H_
+
+#include <tvm/runtime/vm.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace tvm {
+namespace runtime {
+namespace vm {
+
+class VirtualMachineDebug : public VirtualMachine {
+ public:
+ VirtualMachineDebug() : VirtualMachine() {}
+
+ PackedFunc GetFunction(const std::string& name,
+ const std::shared_ptr<ModuleNode>& sptr_to_self) final;
+
+ void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count,
+ Index output_size, const std::vector<Object>& args) final;
+
+ ~VirtualMachineDebug() {}
+
+ private:
+ void Init(const std::vector<TVMContext>& ctxs);
+
+ std::unordered_map<Index, std::string> packed_index_map;
+ std::unordered_map<Index, std::vector<double>> op_durations;
+ std::unordered_map<Index, int> op_invokes;
+};
+
+} // namespace vm
+} // namespace runtime
+} // namespace tvm
+
+#endif // TVM_RUNTIME_VM_PROFILER_VM_H_
return Invoke(this->functions[func_index], args);
}
-void InvokePacked(const PackedFunc& func, Index arg_count, Index output_size,
- const std::vector<Object>& args) {
+void VirtualMachine::InvokePacked(Index packed_index, const PackedFunc& func,
+ Index arg_count, Index output_size,
+ const std::vector<Object>& args) {
size_t arity = 0;
for (Index i = 0; i < arg_count; i++) {
if (args[i].ptr_->tag == ObjectTag::kDatatype) {
for (Index i = 0; i < arity; ++i) {
args.push_back(ReadRegister(instr.packed_args[i]));
}
- InvokePacked(func, arity, instr.output_size, args);
+ InvokePacked(instr.packed_index, func, arity, instr.output_size, args);
for (Index i = 0; i < instr.output_size; ++i) {
WriteRegister(instr.packed_args[instr.arity - instr.output_size + i],
args[instr.arity - instr.output_size + i]);
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import os
+import tvm
+import numpy as np
+
+from nose.tools import nottest
+from tvm import relay
+from tvm.relay.testing import resnet
+
+@nottest
+def test_basic():
+ mod, params = resnet.get_workload()
+ compiler = relay.profiler_vm.VMCompilerProfiler()
+ target = 'llvm'
+ ctx = tvm.cpu()
+ vm = compiler.compile(mod, target)
+ vm.init(ctx)
+ vm.load_params(params)
+
+ data = np.random.rand(1, 3, 224, 224).astype('float32')
+ res = vm.invoke("main", [data])
+ print("\n{}".format(vm.get_stat()))
+
+if __name__ == "__main__":
+ test_basic()