From: Wei Chen Date: Wed, 21 Aug 2019 04:28:05 +0000 (-0700) Subject: [Relay][VM]VM Profiler (#3727) X-Git-Tag: upstream/0.7.0~2013 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=95f12e313734e0469653eea4d364f04fd3839353;p=platform%2Fupstream%2Ftvm.git [Relay][VM]VM Profiler (#3727) * [Relay][VM]VM debugger * Report mean/min/max for op duration * Typos * Lint * Lint * Lint * Support build debug VM in CMake * Lint * Enable VM debug in unit test * Disable debug vm test until new docker image is built * Add device sync code * Fix qnn unit test * Disable vm debug by default * Rename files * Rename classes * Fix comment * Fix comment --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 43bf5ea..f2c711b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,10 +131,33 @@ file(GLOB COMPILER_SRCS src/schedule/*.cc ) -file(GLOB_RECURSE RELAY_SRCS - src/relay/*.cc +file(GLOB_RECURSE RELAY_OP_SRCS + src/relay/op/*.cc ) -list(APPEND COMPILER_SRCS ${RELAY_SRCS}) +file(GLOB_RECURSE RELAY_PASS_SRCS + src/relay/pass/*.cc + ) +file(GLOB RELAY_BACKEND_SRCS + src/relay/backend/*.cc + src/relay/backend/vm/*.cc + ) +file(GLOB_RECURSE RELAY_IR_SRCS + src/relay/ir/*.cc + ) +file(GLOB_RECURSE RELAY_QNN_SRCS + src/relay/qnn/*.cc +) +list(APPEND COMPILER_SRCS ${RELAY_OP_SRCS}) +list(APPEND COMPILER_SRCS ${RELAY_PASS_SRCS}) +list(APPEND COMPILER_SRCS ${RELAY_BACKEND_SRCS}) +list(APPEND COMPILER_SRCS ${RELAY_IR_SRCS}) +list(APPEND COMPILER_SRCS ${RELAY_QNN_SRCS}) + +if(USE_VM_PROFILER) + message(STATUS "Build compiler with Relay VM profiler support...") + file(GLOB BACKEND_VM_PROFILER_SRCS src/relay/backend/vm/profiler/*.cc) + list(APPEND COMPILER_SRCS ${BACKEND_VM_PROFILER_SRCS}) +endif(USE_VM_PROFILER) file(GLOB DATATYPE_SRCS src/codegen/datatype/*.cc) list(APPEND COMPILER_SRCS ${DATATYPE_SRCS}) @@ -198,6 +221,12 @@ if(USE_GRAPH_RUNTIME) endif(USE_GRAPH_RUNTIME_DEBUG) endif(USE_GRAPH_RUNTIME) +if(USE_VM_PROFILER) + message(STATUS "Build with Relay VM profiler support...") + file(GLOB RUNTIME_VM_PROFILER_SRCS src/runtime/vm/profiler/*.cc) + list(APPEND RUNTIME_SRCS ${RUNTIME_VM_PROFILER_SRCS}) +endif(USE_VM_PROFILER) + # Module rules include(cmake/modules/VTA.cmake) include(cmake/modules/CUDA.cmake) diff --git a/Jenkinsfile b/Jenkinsfile index b9a02b1..6134023 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -147,6 +147,7 @@ stage('Build') { echo set\\(USE_GRAPH_RUNTIME ON\\) >> config.cmake echo set\\(USE_STACKVM_RUNTIME ON\\) >> config.cmake echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake + echo set\\(USE_VM_PROFILER ON\\) >> config.cmake echo set\\(USE_ANTLR ON\\) >> config.cmake echo set\\(USE_BLAS openblas\\) >> config.cmake echo set\\(CMAKE_CXX_COMPILER g++\\) >> config.cmake @@ -164,6 +165,7 @@ stage('Build') { echo set\\(USE_VULKAN ON\\) >> config.cmake echo set\\(USE_MICRO ON\\) >> config.cmake echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake + echo set\\(USE_VM_PROFILER ON\\) >> config.cmake echo set\\(CMAKE_CXX_COMPILER clang-7\\) >> config.cmake echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake """ @@ -182,6 +184,7 @@ stage('Build') { echo set\\(USE_SORT ON\\) >> config.cmake echo set\\(USE_MICRO ON\\) >> config.cmake echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake + echo set\\(USE_VM_PROFILER ON\\) >> config.cmake echo set\\(USE_LLVM llvm-config-8\\) >> config.cmake echo set\\(USE_NNPACK ON\\) >> config.cmake echo set\\(NNPACK_PATH /NNPACK/build/\\) >> config.cmake @@ -212,6 +215,7 @@ stage('Build') { echo set\\(USE_SORT ON\\) >> config.cmake echo set\\(USE_RPC ON\\) >> config.cmake echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake + echo set\\(USE_VM_PROFILER ON\\) >> config.cmake echo set\\(USE_LLVM llvm-config-4.0\\) >> config.cmake echo set\\(CMAKE_CXX_COMPILER g++\\) >> config.cmake echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake diff --git a/cmake/config.cmake b/cmake/config.cmake index 988383a..a8ed966 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -88,6 +88,9 @@ set(USE_GRAPH_RUNTIME ON) # Whether enable additional graph debug functions set(USE_GRAPH_RUNTIME_DEBUG OFF) +# Whether enable additional vm profiler functions +set(USE_VM_PROFILER OFF) + # Whether build with LLVM support # Requires LLVM version >= 4.0 # diff --git a/include/tvm/runtime/vm.h b/include/tvm/runtime/vm.h index f5f6e29..fe87188 100644 --- a/include/tvm/runtime/vm.h +++ b/include/tvm/runtime/vm.h @@ -375,8 +375,41 @@ struct VMFrame { */ class VirtualMachine : public runtime::ModuleNode { public: - PackedFunc GetFunction(const std::string& name, - const std::shared_ptr& sptr_to_self) final; + /*! + * \brief Get a PackedFunc from module. + * + * The PackedFunc may not be fully initialized, + * there might still be first time running overhead when + * executing the function on certain devices. + * For benchmarking, use prepare to eliminate + * + * \param name the name of the function. + * \param sptr_to_self The shared_ptr that points to this module node. + * + * \return PackedFunc(nullptr) when it is not available. + * + * \note The function will always remain valid. + * If the function needs resource from the module(e.g. late linking), + * it should capture sptr_to_self. + */ + virtual PackedFunc GetFunction(const std::string& name, + const std::shared_ptr& sptr_to_self); + + /*! + * \brief Invoke a PackedFunction + * + * \param packed_index The offset of the PackedFunction in all functions. + * \param func The PackedFunction to be invoked. + * \param arg_count The number of arguments to the PackedFunction. + * \param output_size The number of outputs of the PackedFunction. + * \param args Arguments to the PackedFunction. + * + * \note The return value will be stored in the last output_size slots of args. + */ + virtual void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count, + Index output_size, const std::vector& args); + + virtual ~VirtualMachine() {} const char* type_key() const final { return "VirtualMachine"; @@ -456,6 +489,10 @@ class VirtualMachine : public runtime::ModuleNode { */ void RunLoop(); + /*! \brief Get device context for params. + */ + TVMContext GetParamsContext() const; + /*! * \brief Load parameters from the parameter bytearray. * \param params The binary file that contains parameters. @@ -478,9 +515,6 @@ class VirtualMachine : public runtime::ModuleNode { */ void InvokeGlobal(const VMFunction& func, const std::vector& args); - /*! \brief Get device context for params. - */ - TVMContext GetParamsContext() const; /*! \brief The parameter name to data mapping. */ std::unordered_map params_; diff --git a/python/tvm/relay/__init__.py b/python/tvm/relay/__init__.py index 01baa00..8271244 100644 --- a/python/tvm/relay/__init__.py +++ b/python/tvm/relay/__init__.py @@ -34,6 +34,7 @@ from . import debug from . import param_dict from . import feature from .backend import vm +from .backend import profiler_vm from .backend import serializer from .backend import deserializer from .backend import vmobj diff --git a/python/tvm/relay/backend/profiler_vm.py b/python/tvm/relay/backend/profiler_vm.py new file mode 100644 index 0000000..3adbeca --- /dev/null +++ b/python/tvm/relay/backend/profiler_vm.py @@ -0,0 +1,90 @@ +# License .to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=no-else-return, unidiomatic-typecheck, undefined-variable, invalid-name +""" +The Relay Virtual Machine profiler. + +Provides extra APIs for profiling vm execution. +""" +import tvm +from . import vm, _vm + +def _update_target(target): + target = target if target else tvm.target.current_target() + if target is None: + raise ValueError("Target is not set in env or passed as argument.") + + tgts = {} + if isinstance(target, (str, tvm.target.Target)): + dev_type = tvm.expr.IntImm("int32", tvm.nd.context(str(target)).device_type) + tgts[dev_type] = tvm.target.create(target) + elif isinstance(target, dict): + for dev, tgt in target.items(): + dev_type = tvm.expr.IntImm("int32", tvm.nd.context(dev).device_type) + tgts[dev_type] = tvm.target.create(tgt) + else: + raise TypeError("target is expected to be str, tvm.target.Target, " + + "or dict of str to str/tvm.target.Target, but received " + + "{}".format(type(target))) + return tgts + +class VMCompilerProfiler(vm.VMCompiler): + """Build Relay module to run on VM runtime.""" + def __init__(self): + super().__init__() + self.mod = _vm._VMCompilerProfiler() + self._compile = self.mod["compile"] + self._get_vm = self.mod["get_vm"] + + def compile(self, mod, target=None, target_host=None): + """ + Parameters + ---------- + mod : relay.Module + The Relay module to build. + + target : str, :any:`tvm.target.Target`, or dict of str(i.e. + device/context name) to str/tvm.target.Target, optional + For heterogeneous compilation, it is a dictionary indicating context + to target mapping. For homogeneous compilation, it is a build target. + + target_host : str or :any:`tvm.target.Target`, optional + Host compilation target, if target is device. + When TVM compiles device specific program such as CUDA, + we also need host(CPU) side code to interact with the driver + to setup the dimensions and parameters correctly. + target_host is used to specify the host side codegen target. + By default, llvm is used if it is enabled, + otherwise a stackvm intepreter is used. + + Returns + ------- + vm : VirtualMachineProfiler + The profile VM runtime. + """ + target = _update_target(target) + self._compile(mod, target, target_host) + return VirtualMachineProfiler(self._get_vm()) + +class VirtualMachineProfiler(vm.VirtualMachine): + """Relay profile VM runtime.""" + def __init__(self, mod): + super().__init__(mod) + self._get_stat = self.mod["get_stat"] + + def get_stat(self): + return self._get_stat() diff --git a/src/relay/backend/vm/compiler.cc b/src/relay/backend/vm/compiler.cc index 853cd30..17de083 100644 --- a/src/relay/backend/vm/compiler.cc +++ b/src/relay/backend/vm/compiler.cc @@ -30,12 +30,17 @@ #include #include #include +#include +#include +#include +#include #include #include #include #include "../../../runtime/vm/naive_allocator.h" #include "../../backend/compile_engine.h" #include "../../pass/pass_util.h" +#include "compiler.h" namespace tvm { namespace relay { @@ -56,36 +61,6 @@ using namespace relay::transform; // (@jroesch): VM passes, eventually declare as passes. bool IsClosure(const Function& func); -template -using NodeMap = std::unordered_map; -using TagMap = NodeMap; -using TagNameMap = std::unordered_map; -using GlobalMap = NodeMap; -using ConstMap = NodeMap; -using ConstTensorShapeMap = NodeMap>; -using TargetsMap = Map; - -struct VMCompilerContext { - // The module context for the compilation - Module module; - // Error reporter - ErrorReporter err_reporter; - // Map from a unique integer to ADT constructor tag - TagNameMap tag_index_map; - // Map from ADT constructor tag to a unique integer - TagMap tag_map; - // Map from global var to a unique integer - GlobalMap global_map; - // Map from Const object to its index in const pool - ConstMap const_map; - // Map from Const tensor shape to its index in const pool - ConstTensorShapeMap const_tensor_shape_map; - // List of lowered functions - std::vector lowered_funcs; - // The functions that have been lowered. - std::unordered_map seen_funcs; -}; - // Compute the constant pool, i.e a mapping from Constant node to constant index. struct ConstantPool : ExprVisitor { std::set visited; @@ -664,152 +639,131 @@ class VMFunctionCompiler : ExprFunctor { }; -class VMCompiler : public runtime::ModuleNode { - public: - PackedFunc GetFunction(const std::string& name, - const std::shared_ptr& sptr_to_self) final { - if (name == "compile") { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - CHECK_EQ(args.num_args, 3); - this->Compile(args[0], args[1], args[2]); - }); - } else if (name == "get_vm") { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - *rv = runtime::Module(vm_); - }); - } else { - LOG(FATAL) << "Unknown packed function: " << name; - return PackedFunc([sptr_to_self, name](TVMArgs args, TVMRetValue* rv) {}); - } +PackedFunc VMCompiler::GetFunction(const std::string& name, + const std::shared_ptr& sptr_to_self) { + if (name == "compile") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK_EQ(args.num_args, 3); + this->Compile(args[0], args[1], args[2]); + }); + } else if (name == "get_vm") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + *rv = runtime::Module(vm_); + }); + } else { + LOG(FATAL) << "Unknown packed function: " << name; + return PackedFunc([sptr_to_self, name](TVMArgs args, TVMRetValue* rv) {}); } +} - const char* type_key() const final { - return "VMCompiler"; +void VMCompiler::Compile(const Module& mod_ref, + const TargetsMap& targets, + const tvm::Target& target_host) { + CHECK_EQ(targets.size(), 1) + << "Currently VM compiler doesn't support heterogeneous compilation"; + + InitVM(); + targets_ = targets; + target_host_ = target_host; + + // Run some optimizations first, this code should + // be moved to pass manager. + context_.module = OptimizeModule(mod_ref); + + // Populate the global map. + // + // This maps global variables to a global index + // in the VMFunction table. + PopulateGlobalMap(); + + // Next we populate constant map. + auto constant_analysis_result = LayoutConstantPool(context_.module); + context_.const_map = std::get<0>(constant_analysis_result); + context_.const_tensor_shape_map = std::get<1>(constant_analysis_result); + + // Next we get ready by allocating space for + // the global state. + vm_->functions.resize(context_.module->functions.size()); + vm_->constants.resize(context_.const_map.size() + context_.const_tensor_shape_map.size()); + + for (auto pair : context_.const_map) { + vm_->constants[pair.second] = Object::Tensor(pair.first->data); } - std::shared_ptr GetVirtualMachine() const { - return vm_; + for (auto pair : context_.const_tensor_shape_map) { + vm_->constants[pair.second.first] = Object::Tensor(pair.second.second); } - void Compile(const Module& mod_ref, - const TargetsMap& targets, - const tvm::Target& target_host) { - CHECK_EQ(targets.size(), 1) - << "Currently VM compiler doesn't support heterogeneous compilation"; - targets_ = targets; - target_host_ = target_host; - vm_ = std::make_shared(); - - // Run some optimizations first, this code should - // be moved to pass manager. - context_.module = OptimizeModule(mod_ref); - - // Populate the global map. - // - // This maps global variables to a global index - // in the VMFunction table. - PopulateGlobalMap(); + for (auto named_func : context_.module->functions) { + auto gvar = named_func.first; + auto func = named_func.second; + VMFunctionCompiler func_compiler(&context_, targets_); + auto vm_func = func_compiler.Compile(gvar, func); - // Next we populate constant map. - auto constant_analysis_result = LayoutConstantPool(context_.module); - context_.const_map = std::get<0>(constant_analysis_result); - context_.const_tensor_shape_map = std::get<1>(constant_analysis_result); - - // Next we get ready by allocating space for - // the global state. - vm_->functions.resize(context_.module->functions.size()); - vm_->constants.resize(context_.const_map.size() + context_.const_tensor_shape_map.size()); - - for (auto pair : context_.const_map) { - vm_->constants[pair.second] = Object::Tensor(pair.first->data); - } - - for (auto pair : context_.const_tensor_shape_map) { - vm_->constants[pair.second.first] = Object::Tensor(pair.second.second); - } - - for (auto named_func : context_.module->functions) { - auto gvar = named_func.first; - auto func = named_func.second; - VMFunctionCompiler func_compiler(&context_, targets_); - auto vm_func = func_compiler.Compile(gvar, func); - - size_t func_index = context_.global_map.at(gvar); - CHECK(func_index < vm_->functions.size()); - vm_->functions[func_index] = vm_func; - } + size_t func_index = context_.global_map.at(gvar); + CHECK(func_index < vm_->functions.size()); + vm_->functions[func_index] = vm_func; + } #if USE_RELAY_DEBUG - for (auto vm_func : vm_->functions) { - DLOG(INFO) << vm_func << "-------------"; - } + for (auto vm_func : vm_->functions) { + DLOG(INFO) << vm_func << "-------------"; + } #endif // USE_RELAY_DEBUG - LibraryCodegen(); + LibraryCodegen(); - for (auto gv : context_.global_map) { - vm_->global_map.insert({gv.first->name_hint, gv.second}); - } + for (auto gv : context_.global_map) { + vm_->global_map.insert({gv.first->name_hint, gv.second}); } +} - protected: - Module OptimizeModule(const Module& mod) { - // TODO(@icemelon9): check number of targets and build config, add more optimization pass - transform::Sequential seq({transform::SimplifyInference(), - transform::ToANormalForm(), - transform::InlinePrimitives(), - transform::LambdaLift(), - transform::InlinePrimitives(), - transform::FuseOps()}); - auto pass_ctx = transform::PassContext::Create(); - tvm::With ctx(pass_ctx); - return seq(mod); - } - - void PopulateGlobalMap() { - // First we populate global map. - size_t global_index = 0; - for (auto named_func : context_.module->functions) { - auto gvar = named_func.first; - context_.global_map.insert({gvar, global_index++}); - } - } +Module VMCompiler::OptimizeModule(const Module& mod) { + // TODO(@icemelon9): check number of targets and build config, add more optimization pass + transform::Sequential seq({transform::SimplifyInference(), + transform::ToANormalForm(), + transform::InlinePrimitives(), + transform::LambdaLift(), + transform::InlinePrimitives(), + transform::FuseOps()}); + auto pass_ctx = transform::PassContext::Create(); + tvm::With ctx(pass_ctx); + return seq(mod); +} - void LibraryCodegen() { - auto const& lowered_funcs = context_.lowered_funcs; - if (lowered_funcs.size() == 0) { - return; - } - // TODO(@icemelon9): support heterogeneous targets - Target target; - for (auto kv : targets_) { - target = kv.second; - } - if (const auto* f = runtime::Registry::Get("relay.backend.build")) { - runtime::Module mod = - (*f)(tvm::Array(lowered_funcs.begin(), lowered_funcs.end()), target, - target_host_); - CHECK(mod.operator->()); - vm_->lib = mod; - } else { - LOG(FATAL) << "relay.backend.build is not registered"; - } - size_t primitive_index = 0; - for (auto lfunc : lowered_funcs) { - vm_->primitive_map.insert({lfunc->name, primitive_index++}); - } +void VMCompiler::PopulateGlobalMap() { + // First we populate global map. + size_t global_index = 0; + for (auto named_func : context_.module->functions) { + auto gvar = named_func.first; + context_.global_map.insert({gvar, global_index++}); } +} - protected: - /*! \brief Target devices. */ - TargetsMap targets_; - /*! \brief Target host device. */ - tvm::Target target_host_; - /*! \brief Global shared meta data */ - VMCompilerContext context_; - /*! \brief Compiled virtual machine. */ - std::shared_ptr vm_; -}; +void VMCompiler::LibraryCodegen() { + auto const& lowered_funcs = context_.lowered_funcs; + if (lowered_funcs.size() == 0) { + return; + } + // TODO(@icemelon9): support heterogeneous targets + Target target; + for (auto kv : targets_) { + target = kv.second; + } + if (const auto* f = runtime::Registry::Get("relay.backend.build")) { + runtime::Module mod = + (*f)(tvm::Array(lowered_funcs.begin(), lowered_funcs.end()), target, + target_host_); + CHECK(mod.operator->()); + vm_->lib = mod; + } else { + LOG(FATAL) << "relay.backend.build is not registered"; + } + size_t primitive_index = 0; + for (auto lfunc : lowered_funcs) { + vm_->primitive_map.insert({lfunc->name, primitive_index++}); + } +} runtime::Module CreateVMCompiler() { std::shared_ptr exec = std::make_shared(); diff --git a/src/relay/backend/vm/compiler.h b/src/relay/backend/vm/compiler.h new file mode 100644 index 0000000..4a2de0a --- /dev/null +++ b/src/relay/backend/vm/compiler.h @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file src/relay/backend/vm/compiler.h + * \brief A compiler from relay::Module to the VM byte code. + */ + +#ifndef TVM_RELAY_BACKEND_VM_COMPILER_H_ +#define TVM_RELAY_BACKEND_VM_COMPILER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../runtime/vm/profiler/vm.h" +#include "../../../runtime/vm/naive_allocator.h" +#include "../../backend/compile_engine.h" +#include "../../pass/pass_util.h" + +namespace tvm { +namespace relay { +namespace vm { + +using namespace tvm::runtime; +using namespace tvm::runtime::vm; +using namespace relay::transform; + +template +using NodeMap = std::unordered_map; +using TagMap = NodeMap; +using TagNameMap = std::unordered_map; +using GlobalMap = NodeMap; +using ConstMap = NodeMap; +using ConstTensorShapeMap = NodeMap>; +using TargetsMap = Map; + +struct VMCompilerContext { + // The module context for the compilation + Module module; + // Error reporter + ErrorReporter err_reporter; + // Map from a unique integer to ADT constructor tag + TagNameMap tag_index_map; + // Map from ADT constructor tag to a unique integer + TagMap tag_map; + // Map from global var to a unique integer + GlobalMap global_map; + // Map from Const object to its index in const pool + ConstMap const_map; + // Map from Const tensor shape to its index in const pool + ConstTensorShapeMap const_tensor_shape_map; + // List of lowered functions + std::vector lowered_funcs; + // The functions that have been lowered. + std::unordered_map seen_funcs; +}; + + +class VMCompiler : public runtime::ModuleNode { + public: + virtual ~VMCompiler() {} + + virtual PackedFunc GetFunction(const std::string& name, + const std::shared_ptr& sptr_to_self); + + const char* type_key() const { + return "VMCompiler"; + } + + std::shared_ptr GetVirtualMachine() const { + return vm_; + } + + virtual void InitVM() { + vm_ = std::make_shared(); + } + + void Compile(const Module& mod_ref, + const TargetsMap& targets, + const tvm::Target& target_host); + + protected: + Module OptimizeModule(const Module& mod); + + void PopulateGlobalMap(); + + void LibraryCodegen(); + + protected: + /*! \brief Target devices. */ + TargetsMap targets_; + /*! \brief Target host device. */ + tvm::Target target_host_; + /*! \brief Global shared meta data */ + VMCompilerContext context_; + /*! \brief Compiled virtual machine. */ + std::shared_ptr vm_; +}; + +} // namespace vm +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_VM_COMPILER_H_ diff --git a/src/relay/backend/vm/profiler/compiler.cc b/src/relay/backend/vm/profiler/compiler.cc new file mode 100644 index 0000000..9fd28e8 --- /dev/null +++ b/src/relay/backend/vm/profiler/compiler.cc @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file src/relay/backend/vm/profiler/compiler.cc + * \brief A compiler from relay::Module to the VM byte code. + */ + +#include "../../../../runtime/vm/profiler/vm.h" +#include "../compiler.h" + +namespace tvm { +namespace relay { +namespace vm { + +class VMCompilerDebug : public VMCompiler { + public: + VMCompilerDebug() {} + void InitVM() override { vm_ = std::make_shared(); } + virtual ~VMCompilerDebug() {} +}; + +runtime::Module CreateVMCompilerDebug() { + std::shared_ptr exec = std::make_shared(); + return runtime::Module(exec); +} + +TVM_REGISTER_GLOBAL("relay._vm._VMCompilerProfiler") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = CreateVMCompilerDebug(); + }); + +} // namespace vm +} // namespace relay +} // namespace tvm diff --git a/src/runtime/vm/profiler/vm.cc b/src/runtime/vm/profiler/vm.cc new file mode 100644 index 0000000..1d3ac83 --- /dev/null +++ b/src/runtime/vm/profiler/vm.cc @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file src/runtime/vm/profiler/vm.cc + * \brief The Relay debug virtual machine. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "vm.h" + +namespace tvm { +namespace runtime { +namespace vm { + +PackedFunc VirtualMachineDebug::GetFunction( + const std::string& name, const std::shared_ptr& sptr_to_self) { + if (name == "get_stat") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + double total_duration = 0.0; + std::ostringstream os; + os << std::setw(30) << std::left << "#OpName" + << "\t" << std::setw(10) << std::left << "#InvokeCount" + << "\t" + << "#Duration(us): Sum/Mean/Min/Max" << std::endl; + + for (auto kv : op_durations) { + auto vals = op_durations[kv.first]; + auto sum = std::accumulate(vals.begin(), vals.end(), 0.0);; + auto mean = sum / static_cast(vals.size()); + auto min_value = *std::min_element(vals.begin(), vals.end()); + auto max_value = *std::max_element(vals.begin(), vals.end()); + + os << std::setw(30) << std::left << packed_index_map[kv.first] << "\t" + << std::setw(10) << std::left << op_invokes[kv.first] << "\t" + << sum << "/" << mean << "/" << min_value << "/" << max_value << std::endl; + + total_duration += sum; + } + os << "Total Duration " << total_duration << " us" << std::endl; + *rv = os.str(); + }); + } else if (name == "init") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK_EQ(args.size() % 2, 0); + std::vector contexts; + for (int i = 0; i < args.size() / 2; ++i) { + TVMContext ctx; + int device_type = args[i * 2]; + ctx.device_type = DLDeviceType(device_type); + ctx.device_id = args[i * 2 + 1]; + contexts.push_back(ctx); + } + this->Init(contexts); + }); + } else { + return VirtualMachine::GetFunction(name, sptr_to_self); + } +} + +void VirtualMachineDebug::Init(const std::vector& ctxs) { + VirtualMachine::Init(ctxs); + for (auto kv : primitive_map) { + packed_index_map[kv.second] = kv.first; + op_invokes[kv.second] = 0; + } +} + +void VirtualMachineDebug::InvokePacked(Index packed_index, + const PackedFunc& func, Index arg_count, + Index output_size, + const std::vector& args) { + auto ctx = VirtualMachine::GetParamsContext(); + auto op_begin = std::chrono::high_resolution_clock::now(); + VirtualMachine::InvokePacked(packed_index, func, arg_count, output_size, + args); + TVMSynchronize(ctx.device_type, ctx.device_id, nullptr); + auto op_end = std::chrono::high_resolution_clock::now(); + double op_duration = + std::chrono::duration_cast >(op_end - + op_begin) + .count(); + + op_durations[packed_index].push_back(op_duration * 1e6); + op_invokes[packed_index] += 1; +} + +} // namespace vm +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/vm/profiler/vm.h b/src/runtime/vm/profiler/vm.h new file mode 100644 index 0000000..9906032 --- /dev/null +++ b/src/runtime/vm/profiler/vm.h @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file src/runtime/vm/profiler/vm.h + * \brief The Relay debug virtual machine. + */ + +#ifndef TVM_RUNTIME_VM_PROFILER_VM_H_ +#define TVM_RUNTIME_VM_PROFILER_VM_H_ + +#include + +#include +#include +#include +#include + +namespace tvm { +namespace runtime { +namespace vm { + +class VirtualMachineDebug : public VirtualMachine { + public: + VirtualMachineDebug() : VirtualMachine() {} + + PackedFunc GetFunction(const std::string& name, + const std::shared_ptr& sptr_to_self) final; + + void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count, + Index output_size, const std::vector& args) final; + + ~VirtualMachineDebug() {} + + private: + void Init(const std::vector& ctxs); + + std::unordered_map packed_index_map; + std::unordered_map> op_durations; + std::unordered_map op_invokes; +}; + +} // namespace vm +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_VM_PROFILER_VM_H_ diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 1dacfa6..33990ae 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -721,8 +721,9 @@ Object VirtualMachine::Invoke(const std::string& name, const std::vector return Invoke(this->functions[func_index], args); } -void InvokePacked(const PackedFunc& func, Index arg_count, Index output_size, - const std::vector& args) { +void VirtualMachine::InvokePacked(Index packed_index, const PackedFunc& func, + Index arg_count, Index output_size, + const std::vector& args) { size_t arity = 0; for (Index i = 0; i < arg_count; i++) { if (args[i].ptr_->tag == ObjectTag::kDatatype) { @@ -846,7 +847,7 @@ void VirtualMachine::RunLoop() { for (Index i = 0; i < arity; ++i) { args.push_back(ReadRegister(instr.packed_args[i])); } - InvokePacked(func, arity, instr.output_size, args); + InvokePacked(instr.packed_index, func, arity, instr.output_size, args); for (Index i = 0; i < instr.output_size; ++i) { WriteRegister(instr.packed_args[instr.arity - instr.output_size + i], args[instr.arity - instr.output_size + i]); diff --git a/tests/python/unittest/test_runtime_vm_profiler.py b/tests/python/unittest/test_runtime_vm_profiler.py new file mode 100644 index 0000000..826b1a7 --- /dev/null +++ b/tests/python/unittest/test_runtime_vm_profiler.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +import tvm +import numpy as np + +from nose.tools import nottest +from tvm import relay +from tvm.relay.testing import resnet + +@nottest +def test_basic(): + mod, params = resnet.get_workload() + compiler = relay.profiler_vm.VMCompilerProfiler() + target = 'llvm' + ctx = tvm.cpu() + vm = compiler.compile(mod, target) + vm.init(ctx) + vm.load_params(params) + + data = np.random.rand(1, 3, 224, 224).astype('float32') + res = vm.invoke("main", [data]) + print("\n{}".format(vm.get_stat())) + +if __name__ == "__main__": + test_basic()