From: Haichen Shen Date: Fri, 24 Jul 2020 22:49:45 +0000 (-0700) Subject: [Relay][VM] Allow to config allocator type and refactor vm code structure (#6105) X-Git-Tag: upstream/0.7.0~359 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=922e0a05c8cc8fce7964d3f9907fde6981c5f72d;p=platform%2Fupstream%2Ftvm.git [Relay][VM] Allow to config allocator type and refactor vm code structure (#6105) * [Relay][VM] Allow to config allocator type and refactor vm code structure * fix doc * fix * update * trigger ci * trigger ci * trigger ci * trigger ci * fix doc warning --- diff --git a/docs/dev/virtual_machine.rst b/docs/dev/virtual_machine.rst index 5878003..059878f 100644 --- a/docs/dev/virtual_machine.rst +++ b/docs/dev/virtual_machine.rst @@ -276,11 +276,11 @@ VM. Currently, three types of objects, ``NDArray``, ``ADT``, and ``Closure`` objects, are used to represent tensor, tuple/list, and closure data, respectively. More details for each of them can be found at `include/tvm/runtime/ndarray.h`_, -`include/tvm/runtime/vm.h`_, and `include/tvm/runtime/container.h`_, respectively. +`include/tvm/runtime/vm/vm.h`_, and `include/tvm/runtime/container.h`_, respectively. .. _include/tvm/runtime/ndarray.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/ndarray.h -.. _include/tvm/runtime/vm.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/vm.h +.. _include/tvm/runtime/vm/vm.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/vm/vm.h .. _include/tvm/runtime/container.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/container.h @@ -321,7 +321,12 @@ VM Compiler An important part of this infrastructure is a compiler from Relay's full IR into a sequence of bytecode. The VM compiler transforms a ``tvm::relay::Module`` into a ``tvm::relay::vm::Executable``. The executable -contains a set of compiled functions, the compiled functions are contained in ``tvm::relay::vm::Function``. The functions contain metadata about the function as well as its compiled bytecode. The emitted executable object then can be loaded and run by a ``tvm::relay::vm::VirtualMachine`` object. For full definitions of the data structures, please see `include/tvm/runtime/vm.h`_. +contains a set of compiled functions, the compiled functions are contained in ``tvm::relay::vm::Function``. +The functions contain metadata about the function as well as its compiled bytecode. The emitted executable +object then can be loaded and run by a ``tvm::relay::vm::VirtualMachine`` object. For full definitions of the +data structures, please see `include/tvm/runtime/vm/executable.h`_ and `include/tvm/runtime/vm/vm.h`_. + +.. _include/tvm/runtime/vm/executable.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/vm/executable.h Optimizations ~~~~~~~~~~~~~ diff --git a/include/tvm/relay/interpreter.h b/include/tvm/relay/interpreter.h index bda73ed..8a41ab7 100644 --- a/include/tvm/relay/interpreter.h +++ b/include/tvm/relay/interpreter.h @@ -38,7 +38,6 @@ #include #include #include -#include #include namespace tvm { @@ -67,7 +66,7 @@ runtime::TypedPackedFunc CreateInterpreter(IRModule mod, DLCont Target target); /*! \brief The container type of Closures used by the interpreter. */ -class InterpreterClosureObj : public runtime::vm::ClosureObj { +class InterpreterClosureObj : public runtime::ClosureObj { public: /*! \brief The set of free variables in the closure. * @@ -89,13 +88,13 @@ class InterpreterClosureObj : public runtime::vm::ClosureObj { } static constexpr const char* _type_key = "interpreter.Closure"; - TVM_DECLARE_FINAL_OBJECT_INFO(InterpreterClosureObj, runtime::vm::ClosureObj); + TVM_DECLARE_FINAL_OBJECT_INFO(InterpreterClosureObj, runtime::ClosureObj); }; -class InterpreterClosure : public runtime::vm::Closure { +class InterpreterClosure : public runtime::Closure { public: TVM_DLL InterpreterClosure(tvm::Map env, Function func); - TVM_DEFINE_OBJECT_REF_METHODS(InterpreterClosure, runtime::vm::Closure, InterpreterClosureObj); + TVM_DEFINE_OBJECT_REF_METHODS(InterpreterClosure, runtime::Closure, InterpreterClosureObj); }; /*! \brief The container type of RecClosure. */ diff --git a/include/tvm/runtime/container.h b/include/tvm/runtime/container.h index 5467ae4..f8fa09d 100644 --- a/include/tvm/runtime/container.h +++ b/include/tvm/runtime/container.h @@ -1671,6 +1671,23 @@ struct PackedFuncValueConverter> { } }; +/*! + * \brief An object representing a closure. This object is used by both the + * Relay VM and interpreter. + */ +class ClosureObj : public Object { + public: + static constexpr const uint32_t _type_index = TypeIndex::kRuntimeClosure; + static constexpr const char* _type_key = "runtime.Closure"; + TVM_DECLARE_BASE_OBJECT_INFO(ClosureObj, Object); +}; + +/*! \brief reference to closure. */ +class Closure : public ObjectRef { + public: + TVM_DEFINE_OBJECT_REF_METHODS(Closure, ObjectRef, ClosureObj); +}; + } // namespace runtime // expose the functions to the root namespace. diff --git a/include/tvm/runtime/vm.h b/include/tvm/runtime/vm.h deleted file mode 100644 index cb98715..0000000 --- a/include/tvm/runtime/vm.h +++ /dev/null @@ -1,826 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file tvm/runtime/vm.h - * \brief A virtual machine for executing Relay programs. - */ -#ifndef TVM_RUNTIME_VM_H_ -#define TVM_RUNTIME_VM_H_ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -namespace tvm { -namespace runtime { -namespace vm { - -/*! - * \brief An object representing a closure. This object is used by both the - * Relay VM and interpreter. - */ -class ClosureObj : public Object { - public: - static constexpr const uint32_t _type_index = TypeIndex::kRuntimeClosure; - static constexpr const char* _type_key = "runtime.Closure"; - TVM_DECLARE_BASE_OBJECT_INFO(ClosureObj, Object); -}; - -/*! \brief reference to closure. */ -class Closure : public ObjectRef { - public: - TVM_DEFINE_OBJECT_REF_METHODS(Closure, ObjectRef, ClosureObj); -}; - -/*! - * \brief An object representing a vm closure. - */ -class VMClosureObj : public ClosureObj { - public: - /*! - * \brief The index into the function list. The function could be any - * function object that is compatible to the VM runtime. - */ - size_t func_index; - /*! \brief The free variables of the closure. */ - std::vector free_vars; - - static constexpr const char* _type_key = "vm.Closure"; - TVM_DECLARE_FINAL_OBJECT_INFO(VMClosureObj, ClosureObj); -}; - -/*! \brief reference to closure. */ -class VMClosure : public Closure { - public: - VMClosure(size_t func_index, std::vector free_vars); - TVM_DEFINE_OBJECT_REF_METHODS(VMClosure, Closure, VMClosureObj); -}; - -/*! \brief Magic number for NDArray list file */ -constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7; - -/*! \brief A register name. */ -using RegName = int64_t; - -/*! \brief An alias for the integer type used ubiquitously - * in the VM. - */ -using Index = int64_t; - -/*! \brief An enumeration of Relay's opcodes. - * - * The opcode is used to implement instruction - * as a tagged union. - */ -enum class Opcode { - Move = 0U, - Ret = 1U, - Invoke = 2U, - InvokeClosure = 3U, - InvokePacked = 4U, - AllocTensor = 5U, - AllocTensorReg = 6U, - AllocADT = 7U, - AllocClosure = 8U, - GetField = 9U, - If = 10U, - LoadConst = 11U, - Goto = 12U, - GetTag = 13U, - LoadConsti = 14U, - Fatal = 15U, - AllocStorage = 16U, - ShapeOf = 17U, - ReshapeTensor = 18U, -}; - -/*! \brief A single virtual machine instruction. - * - * The representation of the instruction is as - * a tagged union. - * - * The first field represents which instruction, - * and by extension which field of the union - * is active. - */ -struct Instruction { - /*! \brief The instruction opcode. */ - Opcode op; - - /*! \brief The destination register. */ - RegName dst; - - union { - struct /* AllocTensor Operands */ { - /*! \brief The storage to allocate from. */ - RegName storage; - /*! \brief The offset into the storage to allocate from. */ - Index offset; - /*! \brief The number of dimensions. */ - uint32_t ndim; - /*! \brief The shape of tensor. */ - int64_t* shape; - /*! \brief The datatype of tensor to be allocated. */ - DLDataType dtype; - } alloc_tensor; - struct /* AllocTensorReg Operands */ { - /*! \brief The storage to allocate from. */ - RegName storage; - /*! \brief The offset into the storage to allocate from. */ - Index offset; - /*! \brief The register to read the shape out of. */ - RegName shape_register; - /*! \brief The datatype of tensor to be allocated. */ - DLDataType dtype; - } alloc_tensor_reg; - struct /* InvokeClosure Operands */ { - /*! \brief The register containing the closure. */ - RegName closure; - /*! \brief The number of arguments to the closure. */ - Index num_closure_args; - /*! \brief The closure arguments as an array. */ - RegName* closure_args; - }; - struct /* Return Operands */ { - /*! \brief The register to return. */ - RegName result; - }; - struct /* Move Operands */ { - /*! \brief The source register for a move operation. */ - RegName from; - }; - struct /* InvokePacked Operands */ { - /*! \brief The index into the packed function table. */ - Index packed_index; - /*! \brief The arity of the packed function. */ - Index arity; - /*! \brief The number of outputs produced by the packed function. */ - Index output_size; - /*! \brief The arguments to pass to the packed function. */ - RegName* packed_args; - }; - struct /* If Operands */ { - /*! \brief The register containing the test value. */ - RegName test; - /*! \brief The register containing the target value. */ - RegName target; - /*! \brief The program counter offset for the true branch. */ - Index true_offset; - /*! \brief The program counter offset for the false branch. */ - Index false_offset; - } if_op; - struct /* Invoke Operands */ { - /*! \brief The function to call. */ - Index func_index; - /*! \brief The number of arguments to the function. */ - Index num_args; - /*! \brief The registers containing the arguments. */ - RegName* invoke_args_registers; - }; - struct /* LoadConst Operands */ { - /* \brief The index into the constant pool. */ - Index const_index; - }; - struct /* LoadConsti Operands */ { - /* \brief The index into the constant pool. */ - Index val; - } load_consti; - struct /* Jump Operands */ { - /*! \brief The jump offset. */ - Index pc_offset; - }; - struct /* Proj Operands */ { - /*! \brief The register to project from. */ - RegName object; - /*! \brief The field to read out. */ - Index field_index; - }; - struct /* GetTag Operands */ { - /*! \brief The register to project from. */ - RegName object; - } get_tag; - struct /* AllocADT Operands */ { - /*! \brief The datatype's constructor tag. */ - Index constructor_tag; - /*! \brief The number of fields to store in the datatype. */ - Index num_fields; - /*! \brief The fields as an array. */ - RegName* datatype_fields; - }; - struct /* AllocClosure Operands */ { - /*! \brief The index into the function table. */ - Index clo_index; - /*! \brief The number of free variables to capture. */ - Index num_freevar; - /*! \brief The free variables as an array. */ - RegName* free_vars; - }; - struct /* AllocStorage Operands */ { - /*! \brief The size of the allocation. */ - RegName allocation_size; - /*! \brief The alignment of the allocation. */ - Index alignment; - /*! \brief The hint of the dtype. */ - DLDataType dtype_hint; - } alloc_storage; - struct /* ShapeOf Operands */ { - RegName tensor; - } shape_of; - struct /* ReshapeTensor Operands */ { - RegName tensor; - RegName newshape; - } reshape_tensor; - }; - - /*! - * \brief Construct a return instruction. - * \param return_reg The register containing the return value. - * \return The return instruction. - */ - static Instruction Ret(RegName return_reg); - /*! - * \brief Construct a fatal instruction. - * \return The fatal instruction. - */ - static Instruction Fatal(); - /*! - * \brief Construct a invoke packed instruction. - * \param packed_index The index of the packed function. - * \param arity The arity of the function. - * \param output_size The number of outputs of the packed function. - * \param args The argument registers. - * \return The invoke packed instruction. - */ - static Instruction InvokePacked(Index packed_index, Index arity, Index output_size, - const std::vector& args); - /*! - * \brief Construct an allocate tensor instruction with constant shape. - * \param storage The storage to allocate out of. - * \param offset The offset to allocate at. - * \param shape The shape of the tensor. - * \param dtype The dtype of the tensor. - * \param dst The destination register. - * \return The allocate tensor instruction. - */ - static Instruction AllocTensor(RegName storage, Index offset, const std::vector& shape, - DLDataType dtype, RegName dst); - /*! - * \brief Construct an allocate tensor instruction with register. - * \param storage The storage to allocate out of. - * \param offset The offset into the storage to allocate from. - * \param shape_register The register containing the shape. - * \param dtype The dtype of the tensor. - * \param dst The destination register. - * \return The allocate tensor instruction. - */ - static Instruction AllocTensorReg(RegName storage, Index offset, RegName shape_register, - DLDataType dtype, RegName dst); - /*! - * \brief Construct an allocate datatype instruction. - * \param tag The datatype tag. - * \param num_fields The number of fields for the datatype. - * \param fields The registers containing the fields. - * \param dst The register name of the destination. - * \return The allocate instruction tensor. - */ - static Instruction AllocADT(Index tag, Index num_fields, const std::vector& fields, - RegName dst); - /*! - * \brief Construct an allocate closure instruction. - * \param func_index The index of the function table. - * \param num_freevar The number of free variables. - * \param free_vars The registers of the free variables. - * \param dst The destination register. - * \return The allocate closure instruction. - */ - static Instruction AllocClosure(Index func_index, Index num_freevar, - const std::vector& free_vars, RegName dst); - /*! - * \brief Construct a get field instruction. - * \param object_reg The register containing the object to project from. - * \param field_index The field to read out of the object. - * \param dst The destination register. - * \return The get field instruction. - */ - static Instruction GetField(RegName object_reg, Index field_index, RegName dst); - /*! - * \brief Construct a get_tag instruction. - * \param object_reg The register containing the object to project from. - * \param dst The destination register. - * \return The get_tag instruction. - */ - static Instruction GetTag(RegName object_reg, RegName dst); - /*! - * \brief Construct an if instruction. - * \param test The register containing the test value. - * \param target The register containing the target value. - * \param true_branch The offset to the true branch. - * \param false_branch The offset to the false branch. - * \return The if instruction. - */ - static Instruction If(RegName test, RegName target, Index true_branch, Index false_branch); - /*! - * \brief Construct a goto instruction. - * \param pc_offset The offset from the current pc. - * \return The goto instruction. - */ - static Instruction Goto(Index pc_offset); - /*! - * \brief Construct an invoke instruction. - * \param func_index The index of the function to invoke. - * \param args The registers containing the arguments. - * \param dst The destination register. - * \return The invoke instruction. - */ - static Instruction Invoke(Index func_index, const std::vector& args, RegName dst); - /*! - * \brief Construct an invoke closure instruction. - * \param closure The register of the closure to invoke. - * \param args The registers containing the arguments. - * \param dst The destination register. - * \return The invoke closure instruction. - */ - static Instruction InvokeClosure(RegName closure, const std::vector& args, RegName dst); - /*! - * \brief Construct a load constant instruction. - * \param const_index The index of the constant. - * \param dst The destination register. - * \return The load constant instruction. - */ - static Instruction LoadConst(Index const_index, RegName dst); - /*! - * \brief Construct a load_constanti instruction. - * \param val The interger constant value. - * \param dst The destination register. - * \return The load_constanti instruction. - */ - static Instruction LoadConsti(Index val, RegName dst); - /*! - * \brief Construct a move instruction. - * \param src The source register. - * \param dst The destination register. - * \return The move instruction. - */ - static Instruction Move(RegName src, RegName dst); - - /*! - * \brief Allocate a storage block. - * \param size The size of the allocation. - * \param alignment The allocation's alignment. - * \param dtype_hint The data type hint for the allocator. - * \param dst The destination to place the storage. - * \return The alloc storage instruction. - */ - static Instruction AllocStorage(RegName size, Index alignment, DLDataType dtype_hint, - RegName dst); - - /*! - * \brief Get the shape of an input tensor. - * \param tensor The input tensor. - * \param dst The destination to store the shape of the given tensor. - * \return The shape of instruction. - */ - static Instruction ShapeOf(RegName tensor, RegName dst); - - /*! - * \brief Reshape the tensor given the new shape. - * \param tensor The input tensor. - * \param newshape The shape tensor. - * \param dst The destination to store the output tensor with new shape. - * \return The reshape tensor instruction. - */ - static Instruction ReshapeTensor(RegName tensor, RegName newshape, RegName dst); - - Instruction(); - Instruction(const Instruction& instr); - Instruction& operator=(const Instruction& instr); - ~Instruction(); - - friend std::ostream& operator<<(std::ostream& os, const Instruction&); -}; - -/*! - * \brief A representation of a Relay function in the VM. - * - * Contains metadata about the compiled function, as - * well as the compiled VM instructions. - */ -struct VMFunction { - /*! \brief The function's name. */ - std::string name; - /*! \brief The function parameter names. */ - std::vector params; - /*! \brief The instructions representing the function. */ - std::vector instructions; - /*! \brief The size of the frame for this function */ - Index register_file_size; - - VMFunction(const std::string& name, std::vector params, - const std::vector& instructions, Index register_file_size) - : name(name), - params(params), - instructions(instructions), - register_file_size(register_file_size) {} - - VMFunction() {} - - friend std::ostream& operator<<(std::ostream& os, const VMFunction&); -}; - -/*! - * \brief A representation of a stack frame. - * - * A stack frame is a record containing the information needed - * to restore the caller's virtual machine state after returning - * from a function call. - */ -struct VMFrame { - /*! \brief The return program counter. */ - Index pc; - /*! \brief The index into the function table, points to the caller. */ - Index func_index; - /*! \brief The number of arguments. */ - Index args; - /*! \brief A pointer into the caller function's instructions. */ - const Instruction* code; - - /*! \brief Statically allocated space for objects */ - std::vector register_file; - - /*! \brief Register in caller's frame to put return value */ - RegName caller_return_register; - - VMFrame(Index pc, Index func_index, Index args, const Instruction* code, Index register_file_size) - : pc(pc), - func_index(func_index), - args(args), - code(code), - register_file(register_file_size), - caller_return_register(0) {} -}; - -/*! - * \brief The executable emitted by the VM compiler. - * - * The executable contains information (e.g. data in different memory regions) - * to run in a virtual machine. - * - * - Global section, containing all globals. - * - Constant section, storing the constant pool. - * - Primitive name section, containing the function name of the primitive ops - * used by the virtual machine. - * - Code section, handling the VM functions and bytecode. - */ -class Executable : public ModuleNode { - public: - /*! - * \brief Get a PackedFunc from an executable module. - * - * \param name the name of the function. - * \param sptr_to_self The shared_ptr that points to this module node. - * - * \return PackedFunc or nullptr when it is not available. - */ - PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) final; - - /*! - * \brief Serialize the executable into global section, constant section, and - * code section. - * - * \return The binary representation of the VM. - */ - TVMByteArray Save(); - - /*! - * \brief Load the saved VM executable. - * - * \param code The bytecode in string. - * \param lib The compiled runtime library. - * - * \return exe The constructed executable. - */ - static runtime::Module Load(const std::string& code, const runtime::Module lib); - - /*! - * \brief Get the serialized form of the `functions`. This is - * essentially bytecode serialization. - * - * \return The serialized vm bytecode. - * - * \note The bytecode is in the following format: - * func_name reg_file_size num_instructions - * param1 param2 ... paramM - * instruction1 - * instruction2 - * ... - * instructionN - * - * Each instruction is printed in the following format: - * opcode num_fields field1 ... fieldX # The text format. - * - * Serializing an `Instruction` requires us to deal with the bytecode. Each line - * of the instructions could be serialized as the following format: - * hash, opcode, f1, f2, ..., fX, field with variable length - * 1. hash: the hash of the instruction. This number will be used to help us - * validate if an instruction is well-formed during deserialization. - * 2. opcode: the opcode code of the instruction. - * 3. f1, f2, ..., fX. These fields together represent the fixed fields in - * an instruction, e.g., `from` and `dst` fields of a `Move` instruction. For - * example, `DLDataType` will be unpacked into three fields (code, bits, lanes). - * 4. The rest of the line indicates the field with variable length, e.g., - * the shape of a tensor, the args used by an `InvokPacked` instruction, etc. - - * The field starting from # is only used for debugging. The serialized code - * doesn't contain it, therefore the deserializer doens't need to handle it. - */ - std::string GetBytecode() const; - - /*! - * \brief Print the detailed statistics of the given code, i.e. number of - * globls and constants, etc. - */ - std::string Stats() const; - - /*! - * \brief Get the `lib` module in an executable. Users have the flexibility to call - * `export_library` from the frontend to save the library to disk. - * - * \return The runtime module that contains the hardwre dependent code. - */ - runtime::Module GetLib() const { return lib; } - - /*! - * \brief Get the arity of the VM Fucntion. - * \param func Function name. - * \return The number of parameters. - */ - int GetFunctionArity(std::string func) const; - - /*! - * \brief Get the parameter name given the function name and parameter index. - * \param func Function name. - * \param index Parameter index. - * \return The parameter name. - */ - std::string GetFunctionParameterName(std::string func, uint32_t index) const; - - virtual ~Executable() {} - - const char* type_key() const final { return "VMExecutable"; } - - /*! \brief The runtime module/library that contains both the host and also the device - * code when executing on non-CPU devices. */ - runtime::Module lib; - /*! \brief The global constant pool. */ - std::vector constants; - /*! \brief A map from globals (as strings) to their index in the function map. */ - std::unordered_map global_map; - /*! \brief A mapping from the packed function (as string) to the index that - * corresponds to the position of the `packed_funcs` list in a `VirtualMachine` object. - */ - std::unordered_map primitive_map; - /*! \brief The virtual machine's function table. */ - std::vector functions; - - private: - /*! - * \brief Save the globals. - * - * \param strm The input stream. - */ - void SaveGlobalSection(dmlc::Stream* strm); - - /*! - * \brief Save the constant pool. - * - * \param strm The input stream. - */ - void SaveConstantSection(dmlc::Stream* strm); - - /*! - * \brief Save primitive op names. - * - * \param strm The input stream. - */ - void SavePrimitiveOpNames(dmlc::Stream* strm); - - /*! - * \brief Save the vm functions. - * - * \param strm The input stream. - */ - void SaveCodeSection(dmlc::Stream* strm); - - /*! - * \brief Load the globals. - * - * \param strm The input stream. - */ - void LoadGlobalSection(dmlc::Stream* strm); - - /*! - * \brief Load the constant pool. - * - * \param strm The input stream. - */ - void LoadConstantSection(dmlc::Stream* strm); - - /*! - * \brief Load primitive op names. - * - * \param strm The input stream. - */ - void LoadPrimitiveOpNames(dmlc::Stream* strm); - - /*! - * \brief Load the vm functions. - * - * \param strm The input stream. - */ - void LoadCodeSection(dmlc::Stream* strm); - - /*! \brief The serialized bytecode. */ - std::string code_; -}; - -/*! - * \brief The virtual machine. - * - * The virtual machine contains all the current execution state, - * as well as the executable. - * - * The goal is to have a single self-contained object, - * enabling one to easily pass around VMs, execute them on - * multiple threads, or serialize them to disk or over the - * wire. - */ -class VirtualMachine : public runtime::ModuleNode { - public: - /*! - * \brief Get a PackedFunc from module. - * - * The PackedFunc may not be fully initialized, - * there might still be first time running overhead when - * executing the function on certain devices. - * For benchmarking, use prepare to eliminate - * - * \param name the name of the function. - * \param sptr_to_self The shared_ptr that points to this module node. - * - * \return PackedFunc(nullptr) when it is not available. - * - * \note The function will always remain valid. - * If the function needs resource from the module(e.g. late linking), - * it should capture sptr_to_self. - */ - virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self); - - virtual ~VirtualMachine() {} - - const char* type_key() const final { return "VirtualMachine"; } - - VirtualMachine() : frames_(), func_index_(0), code_(nullptr), pc_(0), exec_(nullptr) {} - - /*! - * \brief load the executable for the virtual machine. - * \param exec The executable. - */ - virtual void LoadExecutable(const Executable* exec); - - protected: - /*! \brief The virtual machine's packed function table. */ - std::vector packed_funcs_; - /*! \brief The current stack of call frames. */ - std::vector frames_; - /*! \brief The fuction table index of the current function. */ - Index func_index_; - /*! \brief The current pointer to the code section. */ - const Instruction* code_; - /*! \brief The virtual machine PC. */ - Index pc_; - /*! \brief The special return register. */ - ObjectRef return_register_; - /*! \brief The executable the VM will operate on. */ - const Executable* exec_; - /*! \brief The function name to inputs mapping. */ - std::unordered_map> inputs_; - /*! \brief The set of TVM contexts the VM is currently executing on. */ - std::vector ctxs_; - - /*! \brief Push a call frame on to the call stack. */ - void PushFrame(Index arg_count, Index ret_pc, const VMFunction& vm_func); - - /*! - * \brief Pop a frame off the call stack. - * \return The number of frames left. - */ - Index PopFrame(); - - /*! - * \brief Write to a VM register. - * \param reg The register to write to. - * \param obj The object to write to. - */ - inline void WriteRegister(RegName reg, const ObjectRef& obj); - - /*! - * \brief Read a VM register. - * \param reg The register to read from. - * \return The read object. - */ - inline ObjectRef ReadRegister(RegName reg) const; - - /*! - * \brief Read a VM register and cast it to int32_t - * \param reg The register to read from. - * \return The read scalar. - */ - inline int64_t LoadScalarInt(RegName reg) const; - - /*! - * \brief Invoke a VM function. - * \param func The function. - * \param args The arguments to the function. - * \return The object representing the result. - */ - ObjectRef Invoke(const VMFunction& func, const std::vector& args); - - // TODO(@jroesch): I really would like this to be a global variable. - /*! - * \brief Invoke a VM function by name. - * \param name The function's name. - * \param args The arguments to the function. - * \return The object representing the result. - */ - ObjectRef Invoke(const std::string& name, const std::vector& args); - - /*! - * \brief Invoke a PackedFunction - * - * \param packed_index The offset of the PackedFunction in all functions. - * \param func The PackedFunction to be invoked. - * \param arg_count The number of arguments to the PackedFunction. - * \param output_size The number of outputs of the PackedFunction. - * \param args Arguments to the PackedFunction. - * - * \note The return value will be stored in the last output_size slots of args. - */ - virtual void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count, - Index output_size, const std::vector& args); - - /*! - * \brief Initialize the virtual machine for a set of contexts. - * \param contexts The set of TVM contexts. - */ - void Init(const std::vector& contexts); - - /*! \brief Run VM dispatch loop. */ - void RunLoop(); - - /*! \brief Get device context for params. */ - TVMContext GetParamsContext() const; - - private: - /*! - * \brief Invoke a global setting up the VM state to execute. - * - * This does not begin execution of the VM. - */ - void InvokeGlobal(const VMFunction& func, const std::vector& args); - - /*! - * \brief The constant pool for runtime. It caches the device dependent - * object to avoid rellocation of constants during inference. - */ - std::vector const_pool_; -}; - -} // namespace vm -} // namespace runtime -} // namespace tvm - -#endif // TVM_RUNTIME_VM_H_ diff --git a/include/tvm/runtime/vm/bytecode.h b/include/tvm/runtime/vm/bytecode.h new file mode 100644 index 0000000..89a3164 --- /dev/null +++ b/include/tvm/runtime/vm/bytecode.h @@ -0,0 +1,377 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/runtime/vm/bytecode.h + * \brief The bytecode for Relay virtual machine. + */ +#ifndef TVM_RUNTIME_VM_BYTECODE_H_ +#define TVM_RUNTIME_VM_BYTECODE_H_ + +#include + +#include +#include + +namespace tvm { +namespace runtime { +namespace vm { + +/*! \brief A register name. */ +using RegName = int64_t; + +/*! \brief An alias for the integer type used ubiquitously + * in the VM. + */ +using Index = int64_t; + +/*! \brief An enumeration of Relay's opcodes. + * + * The opcode is used to implement instruction + * as a tagged union. + */ +enum class Opcode { + Move = 0U, + Ret = 1U, + Invoke = 2U, + InvokeClosure = 3U, + InvokePacked = 4U, + AllocTensor = 5U, + AllocTensorReg = 6U, + AllocADT = 7U, + AllocClosure = 8U, + GetField = 9U, + If = 10U, + LoadConst = 11U, + Goto = 12U, + GetTag = 13U, + LoadConsti = 14U, + Fatal = 15U, + AllocStorage = 16U, + ShapeOf = 17U, + ReshapeTensor = 18U, +}; + +/*! \brief A single virtual machine instruction. + * + * The representation of the instruction is as + * a tagged union. + * + * The first field represents which instruction, + * and by extension which field of the union + * is active. + */ +struct Instruction { + /*! \brief The instruction opcode. */ + Opcode op; + + /*! \brief The destination register. */ + RegName dst; + + union { + struct /* AllocTensor Operands */ { + /*! \brief The storage to allocate from. */ + RegName storage; + /*! \brief The offset into the storage to allocate from. */ + Index offset; + /*! \brief The number of dimensions. */ + uint32_t ndim; + /*! \brief The shape of tensor. */ + int64_t* shape; + /*! \brief The datatype of tensor to be allocated. */ + DLDataType dtype; + } alloc_tensor; + struct /* AllocTensorReg Operands */ { + /*! \brief The storage to allocate from. */ + RegName storage; + /*! \brief The offset into the storage to allocate from. */ + Index offset; + /*! \brief The register to read the shape out of. */ + RegName shape_register; + /*! \brief The datatype of tensor to be allocated. */ + DLDataType dtype; + } alloc_tensor_reg; + struct /* InvokeClosure Operands */ { + /*! \brief The register containing the closure. */ + RegName closure; + /*! \brief The number of arguments to the closure. */ + Index num_closure_args; + /*! \brief The closure arguments as an array. */ + RegName* closure_args; + }; + struct /* Return Operands */ { + /*! \brief The register to return. */ + RegName result; + }; + struct /* Move Operands */ { + /*! \brief The source register for a move operation. */ + RegName from; + }; + struct /* InvokePacked Operands */ { + /*! \brief The index into the packed function table. */ + Index packed_index; + /*! \brief The arity of the packed function. */ + Index arity; + /*! \brief The number of outputs produced by the packed function. */ + Index output_size; + /*! \brief The arguments to pass to the packed function. */ + RegName* packed_args; + }; + struct /* If Operands */ { + /*! \brief The register containing the test value. */ + RegName test; + /*! \brief The register containing the target value. */ + RegName target; + /*! \brief The program counter offset for the true branch. */ + Index true_offset; + /*! \brief The program counter offset for the false branch. */ + Index false_offset; + } if_op; + struct /* Invoke Operands */ { + /*! \brief The function to call. */ + Index func_index; + /*! \brief The number of arguments to the function. */ + Index num_args; + /*! \brief The registers containing the arguments. */ + RegName* invoke_args_registers; + }; + struct /* LoadConst Operands */ { + /* \brief The index into the constant pool. */ + Index const_index; + }; + struct /* LoadConsti Operands */ { + /* \brief The index into the constant pool. */ + Index val; + } load_consti; + struct /* Jump Operands */ { + /*! \brief The jump offset. */ + Index pc_offset; + }; + struct /* Proj Operands */ { + /*! \brief The register to project from. */ + RegName object; + /*! \brief The field to read out. */ + Index field_index; + }; + struct /* GetTag Operands */ { + /*! \brief The register to project from. */ + RegName object; + } get_tag; + struct /* AllocADT Operands */ { + /*! \brief The datatype's constructor tag. */ + Index constructor_tag; + /*! \brief The number of fields to store in the datatype. */ + Index num_fields; + /*! \brief The fields as an array. */ + RegName* datatype_fields; + }; + struct /* AllocClosure Operands */ { + /*! \brief The index into the function table. */ + Index clo_index; + /*! \brief The number of free variables to capture. */ + Index num_freevar; + /*! \brief The free variables as an array. */ + RegName* free_vars; + }; + struct /* AllocStorage Operands */ { + /*! \brief The size of the allocation. */ + RegName allocation_size; + /*! \brief The alignment of the allocation. */ + Index alignment; + /*! \brief The hint of the dtype. */ + DLDataType dtype_hint; + } alloc_storage; + struct /* ShapeOf Operands */ { + RegName tensor; + } shape_of; + struct /* ReshapeTensor Operands */ { + RegName tensor; + RegName newshape; + } reshape_tensor; + }; + + /*! + * \brief Construct a return instruction. + * \param return_reg The register containing the return value. + * \return The return instruction. + */ + static Instruction Ret(RegName return_reg); + /*! + * \brief Construct a fatal instruction. + * \return The fatal instruction. + */ + static Instruction Fatal(); + /*! + * \brief Construct a invoke packed instruction. + * \param packed_index The index of the packed function. + * \param arity The arity of the function. + * \param output_size The number of outputs of the packed function. + * \param args The argument registers. + * \return The invoke packed instruction. + */ + static Instruction InvokePacked(Index packed_index, Index arity, Index output_size, + const std::vector& args); + /*! + * \brief Construct an allocate tensor instruction with constant shape. + * \param storage The storage to allocate out of. + * \param offset The offset to allocate at. + * \param shape The shape of the tensor. + * \param dtype The dtype of the tensor. + * \param dst The destination register. + * \return The allocate tensor instruction. + */ + static Instruction AllocTensor(RegName storage, Index offset, const std::vector& shape, + DLDataType dtype, RegName dst); + /*! + * \brief Construct an allocate tensor instruction with register. + * \param storage The storage to allocate out of. + * \param offset The offset into the storage to allocate from. + * \param shape_register The register containing the shape. + * \param dtype The dtype of the tensor. + * \param dst The destination register. + * \return The allocate tensor instruction. + */ + static Instruction AllocTensorReg(RegName storage, Index offset, RegName shape_register, + DLDataType dtype, RegName dst); + /*! + * \brief Construct an allocate datatype instruction. + * \param tag The datatype tag. + * \param num_fields The number of fields for the datatype. + * \param fields The registers containing the fields. + * \param dst The register name of the destination. + * \return The allocate instruction tensor. + */ + static Instruction AllocADT(Index tag, Index num_fields, const std::vector& fields, + RegName dst); + /*! + * \brief Construct an allocate closure instruction. + * \param func_index The index of the function table. + * \param num_freevar The number of free variables. + * \param free_vars The registers of the free variables. + * \param dst The destination register. + * \return The allocate closure instruction. + */ + static Instruction AllocClosure(Index func_index, Index num_freevar, + const std::vector& free_vars, RegName dst); + /*! + * \brief Construct a get field instruction. + * \param object_reg The register containing the object to project from. + * \param field_index The field to read out of the object. + * \param dst The destination register. + * \return The get field instruction. + */ + static Instruction GetField(RegName object_reg, Index field_index, RegName dst); + /*! + * \brief Construct a get_tag instruction. + * \param object_reg The register containing the object to project from. + * \param dst The destination register. + * \return The get_tag instruction. + */ + static Instruction GetTag(RegName object_reg, RegName dst); + /*! + * \brief Construct an if instruction. + * \param test The register containing the test value. + * \param target The register containing the target value. + * \param true_branch The offset to the true branch. + * \param false_branch The offset to the false branch. + * \return The if instruction. + */ + static Instruction If(RegName test, RegName target, Index true_branch, Index false_branch); + /*! + * \brief Construct a goto instruction. + * \param pc_offset The offset from the current pc. + * \return The goto instruction. + */ + static Instruction Goto(Index pc_offset); + /*! + * \brief Construct an invoke instruction. + * \param func_index The index of the function to invoke. + * \param args The registers containing the arguments. + * \param dst The destination register. + * \return The invoke instruction. + */ + static Instruction Invoke(Index func_index, const std::vector& args, RegName dst); + /*! + * \brief Construct an invoke closure instruction. + * \param closure The register of the closure to invoke. + * \param args The registers containing the arguments. + * \param dst The destination register. + * \return The invoke closure instruction. + */ + static Instruction InvokeClosure(RegName closure, const std::vector& args, RegName dst); + /*! + * \brief Construct a load constant instruction. + * \param const_index The index of the constant. + * \param dst The destination register. + * \return The load constant instruction. + */ + static Instruction LoadConst(Index const_index, RegName dst); + /*! + * \brief Construct a load_constanti instruction. + * \param val The interger constant value. + * \param dst The destination register. + * \return The load_constanti instruction. + */ + static Instruction LoadConsti(Index val, RegName dst); + /*! + * \brief Construct a move instruction. + * \param src The source register. + * \param dst The destination register. + * \return The move instruction. + */ + static Instruction Move(RegName src, RegName dst); + /*! + * \brief Allocate a storage block. + * \param size The size of the allocation. + * \param alignment The allocation's alignment. + * \param dtype_hint The data type hint for the allocator. + * \param dst The destination to place the storage. + * \return The alloc storage instruction. + */ + static Instruction AllocStorage(RegName size, Index alignment, DLDataType dtype_hint, + RegName dst); + /*! + * \brief Get the shape of an input tensor. + * \param tensor The input tensor. + * \param dst The destination to store the shape of the given tensor. + * \return The shape of instruction. + */ + static Instruction ShapeOf(RegName tensor, RegName dst); + /*! + * \brief Reshape the tensor given the new shape. + * \param tensor The input tensor. + * \param newshape The shape tensor. + * \param dst The destination to store the output tensor with new shape. + * \return The reshape tensor instruction. + */ + static Instruction ReshapeTensor(RegName tensor, RegName newshape, RegName dst); + + Instruction(); + Instruction(const Instruction& instr); + Instruction& operator=(const Instruction& instr); + ~Instruction(); + + friend std::ostream& operator<<(std::ostream& os, const Instruction&); +}; + +} // namespace vm +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_VM_BYTECODE_H_ diff --git a/include/tvm/runtime/vm/executable.h b/include/tvm/runtime/vm/executable.h new file mode 100644 index 0000000..cc38da7 --- /dev/null +++ b/include/tvm/runtime/vm/executable.h @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/runtime/vm/executable.h + * \brief The Relay virtual machine executable. + */ +#ifndef TVM_RUNTIME_VM_EXECUTABLE_H_ +#define TVM_RUNTIME_VM_EXECUTABLE_H_ + +#include +#include +#include +#include + +#include +#include +#include + +namespace tvm { +namespace runtime { +namespace vm { + +struct VMFunction; + +/*! + * \brief The executable emitted by the VM compiler. + * + * The executable contains information (e.g. data in different memory regions) + * to run in a virtual machine. + * + * - Global section, containing all globals. + * - Constant section, storing the constant pool. + * - Primitive name section, containing the function name of the primitive ops + * used by the virtual machine. + * - Code section, handling the VM functions and bytecode. + */ +class Executable : public ModuleNode { + public: + /*! + * \brief Get a PackedFunc from an executable module. + * + * \param name the name of the function. + * \param sptr_to_self The shared_ptr that points to this module node. + * + * \return PackedFunc or nullptr when it is not available. + */ + PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) final; + + /*! + * \brief Serialize the executable into global section, constant section, and + * code section. + * + * \return The binary representation of the VM. + */ + TVMByteArray Save(); + + /*! + * \brief Load the saved VM executable. + * + * \param code The bytecode in string. + * \param lib The compiled runtime library. + * + * \return exe The constructed executable. + */ + static runtime::Module Load(const std::string& code, const runtime::Module lib); + + /*! + * \brief Get the serialized form of the `functions`. This is + * essentially bytecode serialization. + * + * \return The serialized vm bytecode. + * + * \note The bytecode is in the following format: + * func_name reg_file_size num_instructions + * param1 param2 ... paramM + * instruction1 + * instruction2 + * ... + * instructionN + * + * Each instruction is printed in the following format: + * opcode num_fields field1 ... fieldX # The text format. + * + * Serializing an `Instruction` requires us to deal with the bytecode. Each line + * of the instructions could be serialized as the following format: + * hash, opcode, f1, f2, ..., fX, field with variable length + * 1. hash: the hash of the instruction. This number will be used to help us + * validate if an instruction is well-formed during deserialization. + * 2. opcode: the opcode code of the instruction. + * 3. f1, f2, ..., fX. These fields together represent the fixed fields in + * an instruction, e.g., `from` and `dst` fields of a `Move` instruction. For + * example, `DLDataType` will be unpacked into three fields (code, bits, lanes). + * 4. The rest of the line indicates the field with variable length, e.g., + * the shape of a tensor, the args used by an `InvokPacked` instruction, etc. + + * The field starting from # is only used for debugging. The serialized code + * doesn't contain it, therefore the deserializer doens't need to handle it. + */ + std::string GetBytecode() const; + + /*! + * \brief Print the detailed statistics of the given code, i.e. number of + * globls and constants, etc. + */ + std::string Stats() const; + + /*! + * \brief Get the `lib` module in an executable. Users have the flexibility to call + * `export_library` from the frontend to save the library to disk. + * + * \return The runtime module that contains the hardwre dependent code. + */ + runtime::Module GetLib() const { return lib; } + + /*! + * \brief Get the arity of the VM Fucntion. + * \param func Function name. + * \return The number of parameters. + */ + int GetFunctionArity(std::string func) const; + + /*! + * \brief Get the parameter name given the function name and parameter index. + * \param func Function name. + * \param index Parameter index. + * \return The parameter name. + */ + std::string GetFunctionParameterName(std::string func, uint32_t index) const; + + virtual ~Executable() {} + + const char* type_key() const final { return "VMExecutable"; } + + /*! \brief The runtime module/library that contains both the host and also the device + * code when executing on non-CPU devices. */ + runtime::Module lib; + /*! \brief The global constant pool. */ + std::vector constants; + /*! \brief A map from globals (as strings) to their index in the function map. */ + std::unordered_map global_map; + /*! \brief A mapping from the packed function (as string) to the index that + * corresponds to the position of the `packed_funcs` list in a `VirtualMachine` object. + */ + std::unordered_map primitive_map; + /*! \brief The virtual machine's function table. */ + std::vector functions; + + private: + /*! + * \brief Save the globals. + * + * \param strm The input stream. + */ + void SaveGlobalSection(dmlc::Stream* strm); + + /*! + * \brief Save the constant pool. + * + * \param strm The input stream. + */ + void SaveConstantSection(dmlc::Stream* strm); + + /*! + * \brief Save primitive op names. + * + * \param strm The input stream. + */ + void SavePrimitiveOpNames(dmlc::Stream* strm); + + /*! + * \brief Save the vm functions. + * + * \param strm The input stream. + */ + void SaveCodeSection(dmlc::Stream* strm); + + /*! + * \brief Load the globals. + * + * \param strm The input stream. + */ + void LoadGlobalSection(dmlc::Stream* strm); + + /*! + * \brief Load the constant pool. + * + * \param strm The input stream. + */ + void LoadConstantSection(dmlc::Stream* strm); + + /*! + * \brief Load primitive op names. + * + * \param strm The input stream. + */ + void LoadPrimitiveOpNames(dmlc::Stream* strm); + + /*! + * \brief Load the vm functions. + * + * \param strm The input stream. + */ + void LoadCodeSection(dmlc::Stream* strm); + + /*! \brief The serialized bytecode. */ + std::string code_; +}; + +} // namespace vm +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_VM_EXECUTABLE_H_ diff --git a/src/runtime/vm/memory_manager.h b/include/tvm/runtime/vm/memory_manager.h similarity index 82% rename from src/runtime/vm/memory_manager.h rename to include/tvm/runtime/vm/memory_manager.h index f59d584..c983cb0 100644 --- a/src/runtime/vm/memory_manager.h +++ b/include/tvm/runtime/vm/memory_manager.h @@ -18,7 +18,7 @@ */ /*! - * \file src/runtime/memory_manager.h + * \file tvm/runtime/vm/memory_manager.h * \brief Abstract device memory management API */ #ifndef TVM_RUNTIME_VM_MEMORY_MANAGER_H_ @@ -64,17 +64,24 @@ struct Buffer { TVMContext ctx; }; +enum AllocatorType { + kNaive = 1, + kPooled, +}; + class Allocator { public: - Allocator() {} - + explicit Allocator(AllocatorType type) : type_(type) {} + virtual ~Allocator() = default; /*! \brief Allocate an empty NDArray using from the allocator. * \param shape The shape of the NDArray. - * \param alignment The datatype of the NDArray. + * \param dtype The datatype of the NDArray. * \param ctx The context where the array is allocated. * \return The empty NDArray. */ NDArray Empty(std::vector shape, DLDataType dtype, DLContext ctx); + /*! \brief Return the allocator type. */ + inline AllocatorType type() const { return type_; } /*! \brief Allocate a buffer given a size, alignment and type. * \param nbytes The size of the buffer. * \param alignment The alignment of the buffer. @@ -90,21 +97,34 @@ class Allocator { * \return The amount of memory currently allocated. */ virtual size_t UsedMemory() const = 0; - virtual ~Allocator() = default; + + private: + AllocatorType type_; }; class MemoryManager { public: static MemoryManager* Global(); - - Allocator* GetAllocator(TVMContext ctx); + /*! + * \brief Get or create an allocator given the context and allocator type. + * \param ctx The TVM context + * \param type The allocator type + * \return The memory allocator. + */ + static Allocator* GetOrCreateAllocator(TVMContext ctx, AllocatorType type); + /*! + * \brief Get an allocator given the context. + * \param ctx The TVM context + * \return The memory allocator. + */ + static Allocator* GetAllocator(TVMContext ctx); private: MemoryManager() {} private: std::mutex mu_; - std::unordered_map > allocators_; + std::unordered_map> allocators_; }; /*! \brief An object representing a storage allocation. */ diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h new file mode 100644 index 0000000..273b8fe --- /dev/null +++ b/include/tvm/runtime/vm/vm.h @@ -0,0 +1,289 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/runtime/vm/vm.h + * \brief The Relay virtual machine runtime. + */ +#ifndef TVM_RUNTIME_VM_VM_H_ +#define TVM_RUNTIME_VM_VM_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace tvm { +namespace runtime { +namespace vm { + +/*! + * \brief An object representing a vm closure. + */ +class VMClosureObj : public ClosureObj { + public: + /*! + * \brief The index into the function list. The function could be any + * function object that is compatible to the VM runtime. + */ + size_t func_index; + /*! \brief The free variables of the closure. */ + std::vector free_vars; + + static constexpr const uint32_t _type_index = TypeIndex::kDynamic; + static constexpr const char* _type_key = "vm.Closure"; + TVM_DECLARE_FINAL_OBJECT_INFO(VMClosureObj, ClosureObj); +}; + +/*! \brief reference to closure. */ +class VMClosure : public Closure { + public: + VMClosure(size_t func_index, std::vector free_vars); + TVM_DEFINE_OBJECT_REF_METHODS(VMClosure, Closure, VMClosureObj); +}; + +/*! + * \brief A representation of a Relay function in the VM. + * + * Contains metadata about the compiled function, as + * well as the compiled VM instructions. + */ +struct VMFunction { + /*! \brief The function's name. */ + std::string name; + /*! \brief The function parameter names. */ + std::vector params; + /*! \brief The instructions representing the function. */ + std::vector instructions; + /*! \brief The size of the frame for this function */ + Index register_file_size; + + VMFunction(const std::string& name, std::vector params, + const std::vector& instructions, Index register_file_size) + : name(name), + params(params), + instructions(instructions), + register_file_size(register_file_size) {} + + VMFunction() {} + + friend std::ostream& operator<<(std::ostream& os, const VMFunction&); +}; + +/*! + * \brief A representation of a stack frame. + * + * A stack frame is a record containing the information needed + * to restore the caller's virtual machine state after returning + * from a function call. + */ +struct VMFrame { + /*! \brief The return program counter. */ + Index pc; + /*! \brief The index into the function table, points to the caller. */ + Index func_index; + /*! \brief The number of arguments. */ + Index args; + /*! \brief A pointer into the caller function's instructions. */ + const Instruction* code; + + /*! \brief Statically allocated space for objects */ + std::vector register_file; + + /*! \brief Register in caller's frame to put return value */ + RegName caller_return_register; + + VMFrame(Index pc, Index func_index, Index args, const Instruction* code, Index register_file_size) + : pc(pc), + func_index(func_index), + args(args), + code(code), + register_file(register_file_size), + caller_return_register(0) {} +}; + +/*! + * \brief The virtual machine. + * + * The virtual machine contains all the current execution state, + * as well as the executable. + * + * The goal is to have a single self-contained object, + * enabling one to easily pass around VMs, execute them on + * multiple threads, or serialize them to disk or over the + * wire. + */ +class VirtualMachine : public runtime::ModuleNode { + public: + /*! + * \brief Get a PackedFunc from module. + * + * The PackedFunc may not be fully initialized, + * there might still be first time running overhead when + * executing the function on certain devices. + * For benchmarking, use prepare to eliminate + * + * \param name the name of the function. + * \param sptr_to_self The shared_ptr that points to this module node. + * + * \return PackedFunc(nullptr) when it is not available. + * + * \note The function will always remain valid. + * If the function needs resource from the module(e.g. late linking), + * it should capture sptr_to_self. + */ + virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self); + + virtual ~VirtualMachine() {} + + const char* type_key() const final { return "VirtualMachine"; } + + VirtualMachine() : frames_(), func_index_(0), code_(nullptr), pc_(0), exec_(nullptr) {} + + /*! + * \brief load the executable for the virtual machine. + * \param exec The executable. + */ + virtual void LoadExecutable(const Executable* exec); + + protected: + /*! \brief Push a call frame on to the call stack. */ + void PushFrame(Index arg_count, Index ret_pc, const VMFunction& vm_func); + + /*! + * \brief Pop a frame off the call stack. + * \return The number of frames left. + */ + Index PopFrame(); + + /*! + * \brief Write to a VM register. + * \param reg The register to write to. + * \param obj The object to write to. + */ + inline void WriteRegister(RegName reg, const ObjectRef& obj); + + /*! + * \brief Read a VM register. + * \param reg The register to read from. + * \return The read object. + */ + inline ObjectRef ReadRegister(RegName reg) const; + + /*! + * \brief Read a VM register and cast it to int32_t + * \param reg The register to read from. + * \return The read scalar. + */ + inline int64_t LoadScalarInt(RegName reg) const; + + /*! + * \brief Invoke a VM function. + * \param func The function. + * \param args The arguments to the function. + * \return The object representing the result. + */ + ObjectRef Invoke(const VMFunction& func, const std::vector& args); + + // TODO(@jroesch): I really would like this to be a global variable. + /*! + * \brief Invoke a VM function by name. + * \param name The function's name. + * \param args The arguments to the function. + * \return The object representing the result. + */ + ObjectRef Invoke(const std::string& name, const std::vector& args); + + /*! + * \brief Invoke a PackedFunction + * + * \param packed_index The offset of the PackedFunction in all functions. + * \param func The PackedFunction to be invoked. + * \param arg_count The number of arguments to the PackedFunction. + * \param output_size The number of outputs of the PackedFunction. + * \param args Arguments to the PackedFunction. + * + * \note The return value will be stored in the last output_size slots of args. + */ + virtual void InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count, + Index output_size, const std::vector& args); + + /*! + * \brief Initialize the virtual machine for a set of contexts. + * \param contexts The set of TVM contexts. + * \param alloc_types The allocator types for each context. + */ + void Init(const std::vector& contexts, const std::vector& alloc_types); + + /*! \brief Run VM dispatch loop. */ + void RunLoop(); + + /*! \brief Get device context for params. */ + TVMContext GetParamsContext() const; + + /*! + * \brief Invoke a global setting up the VM state to execute. + * + * This does not begin execution of the VM. + */ + void InvokeGlobal(const VMFunction& func, const std::vector& args); + + protected: + /*! \brief The virtual machine's packed function table. */ + std::vector packed_funcs_; + /*! \brief The current stack of call frames. */ + std::vector frames_; + /*! \brief The fuction table index of the current function. */ + Index func_index_; + /*! \brief The current pointer to the code section. */ + const Instruction* code_; + /*! \brief The virtual machine PC. */ + Index pc_; + /*! \brief The special return register. */ + ObjectRef return_register_; + /*! \brief The executable the VM will operate on. */ + const Executable* exec_; + /*! \brief The function name to inputs mapping. */ + std::unordered_map> inputs_; + /*! \brief The set of TVM contexts the VM is currently executing on. */ + std::vector ctxs_; + /*! \brief The mapping from TVM context to memory allocator. */ + std::unordered_map allocators_; + /*! + * \brief The constant pool for runtime. It caches the device dependent + * object to avoid rellocation of constants during inference. + */ + std::vector const_pool_; +}; + +} // namespace vm +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_VM_VM_H_ diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index a7bfb32..dcc9528 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -274,6 +274,9 @@ class TVMContext(ctypes.Structure): def __ne__(self, other): return not self.__eq__(other) + def __hash__(self): + return hash(str(self)) + def __repr__(self): if self.device_type >= RPC_SESS_MASK: tbl_id = self.device_type / RPC_SESS_MASK - 1 diff --git a/python/tvm/relay/backend/vm.py b/python/tvm/relay/backend/vm.py index 16d4724..cb7761b 100644 --- a/python/tvm/relay/backend/vm.py +++ b/python/tvm/relay/backend/vm.py @@ -248,8 +248,7 @@ class VMExecutor(Executor): self.ctx = ctx self.target = target self.executable = compile(mod, target) - self.vm = vm_rt.VirtualMachine(self.executable) - self.vm.init(ctx) + self.vm = vm_rt.VirtualMachine(self.executable, ctx) def _make_executor(self, expr=None): main = self.mod["main"] diff --git a/python/tvm/runtime/profiler_vm.py b/python/tvm/runtime/profiler_vm.py index 9d60483..5df10e5 100644 --- a/python/tvm/runtime/profiler_vm.py +++ b/python/tvm/runtime/profiler_vm.py @@ -32,15 +32,15 @@ def enabled(): class VirtualMachineProfiler(vm.VirtualMachine): """Relay profile VM runtime.""" - def __init__(self, mod): - super(VirtualMachineProfiler, self).__init__(mod) - m = mod.module if isinstance(mod, vm.Executable) else mod - self.mod = _ffi_api._VirtualMachineDebug(m) - self._init = self.mod["init"] - self._invoke = self.mod["invoke"] - self._get_stat = self.mod["get_stat"] - self._set_input = self.mod["set_input"] - self._reset = self.mod["reset"] + def __init__(self, exe, ctx, memory_cfg=None): + super(VirtualMachineProfiler, self).__init__(exe, ctx, memory_cfg) + self.module = _ffi_api._VirtualMachineDebug(exe.module) + self._init = self.module["init"] + self._invoke = self.module["invoke"] + self._get_stat = self.module["get_stat"] + self._set_input = self.module["set_input"] + self._reset = self.module["reset"] + self._setup_ctx(ctx, memory_cfg) def get_stat(self, sort_by_time=True): """Get the statistics of executed ops. diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index d7d9451..f88f43d 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -131,8 +131,7 @@ class Executable(object): des_exec = tvm.runtime.vm.Executable.load_exec(loaded_code, loaded_code) # execute the deserialized executable. x_data = np.random.rand(10, 10).astype('float32') - des_vm = tvm.runtime.vm.VirtualMachine(des_exec) - des_vm.init(ctx) + des_vm = tvm.runtime.vm.VirtualMachine(des_exec, ctx) res = des_vm.run(x_data) print(res.asnumpy()) """ @@ -273,29 +272,61 @@ class Executable(object): class VirtualMachine(object): - """Relay VM runtime.""" - - def __init__(self, mod): - if not isinstance(mod, (Executable, tvm.runtime.Module)): - raise TypeError("mod is expected to be the type of Executable or " + - "tvm.runtime.Module, but received {}".format(type(mod))) - m = mod.module if isinstance(mod, Executable) else mod - self.mod = _ffi_api._VirtualMachine(m) - self._exec = mod - self._init = self.mod["init"] - self._invoke = self.mod["invoke"] - self._set_input = self.mod["set_input"] - - def init(self, ctx): - """Initialize the context in the VM. - - Parameters - ---------- - ctx : :py:class:`TVMContext` - The runtime context to run the code on. - """ - args = [ctx.device_type, ctx.device_id] - self._init(*args) + """Relay VM runtime. + + Parameters + ---------- + exe : Executable + The VM executable. + + ctx : tvm.runtime.TVMContext or List[tvm.runtime.TVMContext] + The context to deploy the module + + memory_cfg : str or Dict[tvm.runtime.TVMContext, str], optional + Config the type of memory allocator. The allocator type can be ["naive", + "pooled"]. If memory_cfg is None, all contexts will use pooled allocator + by default. If memory_cfg is string, all contexts will use the specified + allocator type. If memory_cfg is a dict, each context uses the allocator + type specified in the dict, or pooled allocator if not specified in the + dict. + """ + + NAIVE_ALLOCATOR = 1 + POOLED_ALLOCATOR = 2 + + def __init__(self, exe, ctx, memory_cfg=None): + if not isinstance(exe, Executable): + raise TypeError("exe is expected to be the type of Executable, " + + "but received {}".format(type(exe))) + self.module = _ffi_api._VirtualMachine(exe.module) + self._exec = exe + self._init = self.module["init"] + self._invoke = self.module["invoke"] + self._set_input = self.module["set_input"] + self._setup_ctx(ctx, memory_cfg) + + def _setup_ctx(self, ctx, memory_cfg): + """Init context and allocators.""" + if isinstance(ctx, tvm.runtime.TVMContext): + ctx = [ctx] + default_alloc_type = VirtualMachine.POOLED_ALLOCATOR + if memory_cfg is None: + memory_cfg = {} + elif isinstance(memory_cfg, str): + assert memory_cfg in ["naive", "pooled"] + if memory_cfg == "naive": + default_alloc_type = VirtualMachine.NAIVE_ALLOCATOR + memory_cfg = {} + elif not isinstance(memory_cfg, dict): + raise TypeError("memory_cfg is expected be string or dictionary, " + + "but received {}".format(type(memory_cfg))) + init_args = [] + for context in ctx: + init_args.append(context.device_type) + init_args.append(context.device_id) + alloc_type = memory_cfg[context] if context in memory_cfg else default_alloc_type + init_args.append(alloc_type) + self._init(*init_args) def set_input(self, func_name, *args, **kwargs): """Set the input to a function. diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index b57c0eb..1392798 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -27,7 +27,6 @@ #include #include #include -#include #include diff --git a/src/relay/backend/vm/compiler.cc b/src/relay/backend/vm/compiler.cc index ab11c6c..b811911 100644 --- a/src/relay/backend/vm/compiler.cc +++ b/src/relay/backend/vm/compiler.cc @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/relay/backend/vm/compiler.h b/src/relay/backend/vm/compiler.h index 8b1df7f..d1e1f7e 100644 --- a/src/relay/backend/vm/compiler.h +++ b/src/relay/backend/vm/compiler.h @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/relay/backend/vm/inline_primitives.cc b/src/relay/backend/vm/inline_primitives.cc index cf4f533..650df99 100644 --- a/src/relay/backend/vm/inline_primitives.cc +++ b/src/relay/backend/vm/inline_primitives.cc @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/src/relay/backend/vm/lambda_lift.cc b/src/relay/backend/vm/lambda_lift.cc index 011c7d2..22b8364 100644 --- a/src/relay/backend/vm/lambda_lift.cc +++ b/src/relay/backend/vm/lambda_lift.cc @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/src/relay/backend/vm/removed_unused_funcs.cc b/src/relay/backend/vm/removed_unused_funcs.cc index 4e8713b..cdf898f 100644 --- a/src/relay/backend/vm/removed_unused_funcs.cc +++ b/src/relay/backend/vm/removed_unused_funcs.cc @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/src/runtime/container.cc b/src/runtime/container.cc index 62220a8..2532432 100644 --- a/src/runtime/container.cc +++ b/src/runtime/container.cc @@ -25,13 +25,10 @@ #include #include #include -#include namespace tvm { namespace runtime { -using namespace vm; - TVM_REGISTER_GLOBAL("runtime.GetADTTag").set_body([](TVMArgs args, TVMRetValue* rv) { ObjectRef obj = args[0]; const auto& adt = Downcast(obj); diff --git a/src/runtime/vm/bytecode.cc b/src/runtime/vm/bytecode.cc new file mode 100644 index 0000000..edfd3ac --- /dev/null +++ b/src/runtime/vm/bytecode.cc @@ -0,0 +1,610 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/vm/bytecode.cc + * \brief The bytecode for Relay virtual machine. + */ + +#include +#include +#include + +#include + +namespace tvm { +namespace runtime { +namespace vm { + +Instruction::Instruction() {} + +template +static T* Duplicate(T* src, Index size) { + auto dst = new T[size]; + std::copy(src, src + size, dst); + return dst; +} + +Instruction::Instruction(const Instruction& instr) { + this->op = instr.op; + this->dst = instr.dst; + + switch (instr.op) { + case Opcode::Move: + this->from = instr.from; + return; + case Opcode::Fatal: + return; + case Opcode::Ret: + this->result = instr.result; + return; + case Opcode::AllocTensor: + this->alloc_tensor.storage = instr.alloc_tensor.storage; + this->alloc_tensor.offset = instr.alloc_tensor.offset; + this->alloc_tensor.ndim = instr.alloc_tensor.ndim; + this->alloc_tensor.shape = + Duplicate(instr.alloc_tensor.shape, instr.alloc_tensor.ndim); + this->alloc_tensor.dtype = instr.alloc_tensor.dtype; + return; + case Opcode::AllocTensorReg: + this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage; + this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset; + this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register; + this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype; + return; + case Opcode::AllocADT: + this->constructor_tag = instr.constructor_tag; + this->num_fields = instr.num_fields; + this->datatype_fields = Duplicate(instr.datatype_fields, instr.num_fields); + return; + case Opcode::AllocClosure: + this->clo_index = instr.clo_index; + this->num_freevar = instr.num_freevar; + this->free_vars = Duplicate(instr.free_vars, instr.num_freevar); + return; + case Opcode::InvokePacked: + this->packed_index = instr.packed_index; + this->arity = instr.arity; + this->output_size = instr.output_size; + this->packed_args = Duplicate(instr.packed_args, instr.arity); + return; + case Opcode::InvokeClosure: + this->closure = instr.closure; + this->num_closure_args = instr.num_closure_args; + this->closure_args = Duplicate(instr.closure_args, instr.num_closure_args); + return; + case Opcode::Invoke: + this->func_index = instr.func_index; + this->num_args = instr.num_args; + this->invoke_args_registers = Duplicate(instr.invoke_args_registers, instr.num_args); + return; + case Opcode::If: + this->if_op = instr.if_op; + return; + case Opcode::LoadConst: + this->const_index = instr.const_index; + return; + case Opcode::LoadConsti: + this->load_consti = instr.load_consti; + return; + case Opcode::GetField: + this->object = instr.object; + this->field_index = instr.field_index; + return; + case Opcode::GetTag: + this->get_tag = instr.get_tag; + return; + case Opcode::Goto: + this->pc_offset = instr.pc_offset; + return; + case Opcode::AllocStorage: + this->alloc_storage = instr.alloc_storage; + return; + case Opcode::ShapeOf: + this->shape_of.tensor = instr.shape_of.tensor; + return; + case Opcode::ReshapeTensor: + this->reshape_tensor.tensor = instr.reshape_tensor.tensor; + this->reshape_tensor.newshape = instr.reshape_tensor.newshape; + return; + default: + std::ostringstream out; + out << "Invalid instruction " << static_cast(instr.op); + throw std::runtime_error(out.str()); + } +} + +template +static inline void FreeIf(T* t) { + if (t != nullptr) { + delete t; + } +} + +Instruction& Instruction::operator=(const Instruction& instr) { + this->op = instr.op; + this->dst = instr.dst; + + switch (instr.op) { + case Opcode::Move: + this->from = instr.from; + return *this; + case Opcode::Fatal: + return *this; + case Opcode::LoadConsti: + this->load_consti = instr.load_consti; + return *this; + case Opcode::Ret: + this->result = instr.result; + return *this; + case Opcode::AllocTensor: + this->alloc_tensor.storage = this->alloc_tensor.storage; + this->alloc_tensor.offset = instr.alloc_tensor.offset; + this->alloc_tensor.ndim = instr.alloc_tensor.ndim; + this->alloc_tensor.shape = + Duplicate(instr.alloc_tensor.shape, instr.alloc_tensor.ndim); + this->alloc_tensor.dtype = instr.alloc_tensor.dtype; + return *this; + case Opcode::AllocTensorReg: + this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage; + this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset; + this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register; + this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype; + return *this; + case Opcode::AllocADT: + this->constructor_tag = instr.constructor_tag; + this->num_fields = instr.num_fields; + FreeIf(this->datatype_fields); + this->datatype_fields = Duplicate(instr.datatype_fields, instr.num_fields); + return *this; + case Opcode::AllocClosure: + this->clo_index = instr.clo_index; + this->num_freevar = instr.num_freevar; + FreeIf(this->free_vars); + this->free_vars = Duplicate(instr.free_vars, instr.num_freevar); + return *this; + case Opcode::InvokePacked: + this->packed_index = instr.packed_index; + this->arity = instr.arity; + this->output_size = instr.output_size; + FreeIf(this->packed_args); + this->packed_args = Duplicate(instr.packed_args, instr.arity); + return *this; + case Opcode::InvokeClosure: + this->closure = instr.closure; + this->num_closure_args = instr.num_closure_args; + FreeIf(this->closure_args); + this->closure_args = Duplicate(instr.closure_args, instr.num_closure_args); + return *this; + case Opcode::Invoke: + this->func_index = instr.func_index; + this->num_args = instr.num_args; + FreeIf(this->invoke_args_registers); + this->invoke_args_registers = Duplicate(instr.invoke_args_registers, instr.num_args); + return *this; + case Opcode::If: + this->if_op = instr.if_op; + return *this; + case Opcode::LoadConst: + this->const_index = instr.const_index; + return *this; + case Opcode::GetField: + this->object = instr.object; + this->field_index = instr.field_index; + return *this; + case Opcode::GetTag: + this->get_tag = instr.get_tag; + return *this; + case Opcode::Goto: + this->pc_offset = instr.pc_offset; + return *this; + case Opcode::AllocStorage: + this->alloc_storage = instr.alloc_storage; + return *this; + case Opcode::ShapeOf: + this->shape_of.tensor = instr.shape_of.tensor; + return *this; + default: + std::ostringstream out; + out << "Invalid instruction " << static_cast(instr.op); + throw std::runtime_error(out.str()); + } +} + +Instruction::~Instruction() { + switch (this->op) { + case Opcode::Move: + case Opcode::Ret: + case Opcode::AllocTensorReg: + case Opcode::If: + case Opcode::LoadConst: + case Opcode::GetField: + case Opcode::GetTag: + case Opcode::Goto: + case Opcode::LoadConsti: + case Opcode::AllocStorage: + case Opcode::ShapeOf: + case Opcode::ReshapeTensor: + case Opcode::Fatal: + return; + case Opcode::AllocTensor: + delete[] this->alloc_tensor.shape; + return; + case Opcode::AllocADT: + delete[] this->datatype_fields; + return; + case Opcode::AllocClosure: + delete[] this->free_vars; + return; + case Opcode::InvokePacked: + delete[] this->packed_args; + return; + case Opcode::InvokeClosure: + delete[] this->closure_args; + return; + case Opcode::Invoke: + delete[] this->invoke_args_registers; + return; + default: + std::ostringstream out; + LOG(FATAL) << "Invalid instruction " << static_cast(this->op); + } +} + +Instruction Instruction::Ret(RegName result) { + Instruction instr; + instr.op = Opcode::Ret; + instr.result = result; + return instr; +} + +Instruction Instruction::Fatal() { + Instruction instr; + instr.op = Opcode::Fatal; + return instr; +} + +Instruction Instruction::InvokePacked(Index packed_index, Index arity, Index output_size, + const std::vector& args) { + Instruction instr; + instr.op = Opcode::InvokePacked; + instr.packed_index = packed_index; + instr.arity = arity; + instr.output_size = output_size; + instr.packed_args = new RegName[arity]; + for (Index i = 0; i < arity; ++i) { + instr.packed_args[i] = args[i]; + } + return instr; +} + +Instruction Instruction::AllocTensor(RegName storage, RegName offset, + const std::vector& shape, DLDataType dtype, + RegName dst) { + Instruction instr; + instr.op = Opcode::AllocTensor; + instr.dst = dst; + instr.alloc_tensor.storage = storage; + instr.alloc_tensor.offset = offset; + instr.alloc_tensor.ndim = shape.size(); + instr.alloc_tensor.shape = new int64_t[shape.size()]; + for (size_t i = 0; i < shape.size(); ++i) { + instr.alloc_tensor.shape[i] = shape[i]; + } + instr.alloc_tensor.dtype = dtype; + return instr; +} + +Instruction Instruction::AllocTensorReg(RegName storage, RegName offset, RegName shape_register, + DLDataType dtype, RegName dst) { + Instruction instr; + instr.op = Opcode::AllocTensorReg; + instr.dst = dst; + instr.alloc_tensor_reg.storage = storage; + instr.alloc_tensor_reg.offset = offset; + instr.alloc_tensor_reg.shape_register = shape_register; + instr.alloc_tensor_reg.dtype = dtype; + return instr; +} + +Instruction Instruction::AllocStorage(RegName size, Index alignment, DLDataType dtype_hint, + RegName dst) { + Instruction instr; + instr.op = Opcode::AllocStorage; + instr.dst = dst; + instr.alloc_storage.allocation_size = size; + instr.alloc_storage.alignment = alignment; + instr.alloc_storage.dtype_hint = dtype_hint; + return instr; +} + +Instruction Instruction::ShapeOf(RegName tensor, RegName dst) { + Instruction instr; + instr.op = Opcode::ShapeOf; + instr.dst = dst; + instr.shape_of.tensor = tensor; + return instr; +} + +Instruction Instruction::ReshapeTensor(RegName tensor, RegName newshape, RegName dst) { + Instruction instr; + instr.op = Opcode::ReshapeTensor; + instr.dst = dst; + instr.reshape_tensor.tensor = tensor; + instr.reshape_tensor.newshape = newshape; + return instr; +} + +Instruction Instruction::AllocADT(Index tag, Index num_fields, + const std::vector& datatype_fields, RegName dst) { + Instruction instr; + instr.op = Opcode::AllocADT; + instr.dst = dst; + instr.constructor_tag = tag; + instr.num_fields = num_fields; + instr.datatype_fields = new RegName[num_fields]; + for (Index i = 0; i < num_fields; ++i) { + instr.datatype_fields[i] = datatype_fields[i]; + } + return instr; +} + +Instruction Instruction::AllocClosure(Index func_index, Index free_vars, + const std::vector& free_var_register, RegName dst) { + Instruction instr; + instr.op = Opcode::AllocClosure; + instr.dst = dst; + instr.clo_index = func_index; + instr.num_freevar = free_vars; + instr.free_vars = new RegName[instr.num_freevar]; + for (Index i = 0; i < instr.num_freevar; ++i) { + instr.free_vars[i] = free_var_register[i]; + } + return instr; +} + +Instruction Instruction::GetField(RegName object, Index field_index, RegName dst) { + Instruction instr; + instr.op = Opcode::GetField; + instr.dst = dst; + instr.object = object; + instr.field_index = field_index; + return instr; +} + +Instruction Instruction::GetTag(RegName object, RegName dst) { + Instruction instr; + instr.op = Opcode::GetTag; + instr.dst = dst; + instr.get_tag.object = object; + return instr; +} + +Instruction Instruction::If(RegName test, RegName target, Index true_branch, Index false_branch) { + Instruction instr; + instr.op = Opcode::If; + instr.if_op.test = test; + instr.if_op.target = target; + instr.if_op.true_offset = true_branch; + instr.if_op.false_offset = false_branch; + return instr; +} + +Instruction Instruction::Goto(Index pc_offset) { + Instruction instr; + instr.op = Opcode::Goto; + instr.pc_offset = pc_offset; + return instr; +} + +Instruction Instruction::Invoke(Index func_index, const std::vector& args_registers, + RegName dst) { + Instruction instr; + instr.op = Opcode::Invoke; + instr.dst = dst; + instr.func_index = func_index; + instr.num_args = args_registers.size(); + instr.invoke_args_registers = new RegName[instr.num_args]; + for (Index i = 0; i < instr.num_args; ++i) { + instr.invoke_args_registers[i] = args_registers[i]; + } + return instr; +} + +Instruction Instruction::InvokeClosure(RegName closure, const std::vector& args, + RegName dst) { + Instruction instr; + instr.op = Opcode::InvokeClosure; + instr.dst = dst; + instr.closure = closure; + instr.num_closure_args = args.size(); + instr.closure_args = new RegName[args.size()]; + for (size_t i = 0; i < args.size(); ++i) { + instr.closure_args[i] = args[i]; + } + return instr; +} + +Instruction Instruction::LoadConst(Index const_index, RegName dst) { + Instruction instr; + instr.op = Opcode::LoadConst; + instr.dst = dst; + instr.const_index = const_index; + return instr; +} + +Instruction Instruction::LoadConsti(Index val, RegName dst) { + Instruction instr; + instr.op = Opcode::LoadConsti; + instr.dst = dst; + instr.load_consti.val = val; + return instr; +} + +Instruction Instruction::Move(RegName src, RegName dst) { + Instruction instr; + instr.op = Opcode::Move; + instr.dst = dst; + instr.from = src; + return instr; +} + +void DLDatatypePrint(std::ostream& os, const DLDataType& dtype) { + switch (dtype.code) { + case kDLInt: + os << "int"; + break; + case kDLUInt: + os << "uint"; + break; + case kDLFloat: + os << "float"; + break; + } + + os << int(dtype.bits); + if (dtype.lanes != 1) { + os << "x" << dtype.lanes; + } +} + +template +std::string StrJoin(T* items, int offset, int cnt, std::string delim = ", ") { + if (cnt == 0) { + return ""; + } + std::ostringstream oss; + oss << items[offset]; + for (int i = 1; i < cnt; ++i) { + oss << delim << items[offset + i]; + } + return oss.str(); +} + +void InstructionPrint(std::ostream& os, const Instruction& instr) { + switch (instr.op) { + case Opcode::Move: { + os << "move $" << instr.dst << " $" << instr.from; + break; + } + case Opcode::Ret: { + os << "ret $" << instr.result; + break; + } + case Opcode::Fatal: { + os << "fatal"; + break; + } + case Opcode::InvokePacked: { + os << "invoke_packed PackedFunc[" << instr.packed_index << "] (in: $" + << StrJoin(instr.packed_args, 0, instr.arity - instr.output_size, ", $") + << ", out: $" + << StrJoin(instr.packed_args, instr.arity - instr.output_size, instr.output_size, + ", $") + << ")"; + break; + } + case Opcode::AllocTensor: { + os << "alloc_tensor $" << instr.dst << " $" << instr.alloc_tensor.storage << " $" + << instr.alloc_tensor.offset << " [" + << StrJoin(instr.alloc_tensor.shape, 0, instr.alloc_tensor.ndim) << "] "; + DLDatatypePrint(os, instr.alloc_tensor.dtype); + break; + } + case Opcode::AllocTensorReg: { + os << "alloc_tensor_reg $" << instr.dst << " $" << instr.alloc_tensor_reg.storage << " $" + << instr.alloc_tensor_reg.offset << " $" << instr.alloc_tensor_reg.shape_register << " "; + DLDatatypePrint(os, instr.alloc_tensor_reg.dtype); + break; + } + case Opcode::AllocADT: { + os << "alloc_data $" << instr.dst << " tag(" << instr.constructor_tag << ") [$" + << StrJoin(instr.datatype_fields, 0, instr.num_fields, ",$") << "]"; + break; + } + case Opcode::AllocClosure: { + os << "alloc_closure $" << instr.dst << " VMFunc[" << instr.clo_index << "]($" + << StrJoin(instr.free_vars, 0, instr.num_freevar, ",$") << ")"; + break; + } + case Opcode::If: { + os << "if " + << "$" << instr.if_op.test << " $" << instr.if_op.target << " " << instr.if_op.true_offset + << " " << instr.if_op.false_offset; + break; + } + case Opcode::Invoke: { + os << "invoke $" << instr.dst << " VMFunc[" << instr.func_index << "]($" + << StrJoin(instr.invoke_args_registers, 0, instr.num_args, ",$") << ")"; + break; + } + case Opcode::InvokeClosure: { + os << "invoke_closure $" << instr.dst << " $" << instr.closure << "($" + << StrJoin(instr.closure_args, 0, instr.num_closure_args, ",$") << ")"; + break; + } + case Opcode::LoadConst: { + os << "load_const $" << instr.dst << " Const[" << instr.const_index << "]"; + break; + } + case Opcode::LoadConsti: { + os << "load_consti $" << instr.dst << " " << instr.load_consti.val; + break; + } + case Opcode::GetField: { + os << "get_field $" << instr.dst << " $" << instr.object << "[" << instr.field_index << "]"; + break; + } + case Opcode::GetTag: { + os << "get_tag $" << instr.dst << " $" << instr.get_tag.object; + break; + } + case Opcode::Goto: { + os << "goto " << instr.pc_offset; + break; + } + case Opcode::AllocStorage: { + os << "alloc_storage $" << instr.dst << " $" << instr.alloc_storage.allocation_size << " " + << instr.alloc_storage.alignment << " " + << DLDataType2String(instr.alloc_storage.dtype_hint); + break; + } + case Opcode::ShapeOf: { + os << "shape_of $" << instr.dst << " $" << instr.shape_of.tensor; + break; + } + case Opcode::ReshapeTensor: { + os << "reshape_tensor $" << instr.dst << " $" << instr.reshape_tensor.tensor << " $" + << instr.reshape_tensor.newshape; + break; + } + default: + LOG(FATAL) << "should never hit this case" << static_cast(instr.op); + break; + } +} + +std::ostream& operator<<(std::ostream& os, const Instruction& instr) { + InstructionPrint(os, instr); + return os; +} + +} // namespace vm +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/vm/executable.cc b/src/runtime/vm/executable.cc index 4944778..9987621 100644 --- a/src/runtime/vm/executable.cc +++ b/src/runtime/vm/executable.cc @@ -25,7 +25,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/src/runtime/vm/memory_manager.cc b/src/runtime/vm/memory_manager.cc index 4c220bb..4d443d9 100644 --- a/src/runtime/vm/memory_manager.cc +++ b/src/runtime/vm/memory_manager.cc @@ -21,7 +21,7 @@ * \file tvm/runtime/vm/memory_manager.cc * \brief Allocate and manage memory for the runtime. */ -#include "memory_manager.h" +#include #include #include @@ -37,7 +37,7 @@ static void BufferDeleter(Object* obj) { auto* ptr = static_cast(obj); CHECK(ptr->manager_ctx != nullptr); Buffer* buffer = reinterpret_cast(ptr->manager_ctx); - MemoryManager::Global()->GetAllocator(buffer->ctx)->Free(*(buffer)); + MemoryManager::GetAllocator(buffer->ctx)->Free(*(buffer)); delete buffer; delete ptr; } @@ -114,15 +114,49 @@ MemoryManager* MemoryManager::Global() { return &memory_manager; } +Allocator* MemoryManager::GetOrCreateAllocator(TVMContext ctx, AllocatorType type) { + MemoryManager* m = MemoryManager::Global(); + std::lock_guard lock(m->mu_); + if (m->allocators_.find(ctx) == m->allocators_.end()) { + std::unique_ptr alloc; + switch (type) { + case kNaive: { + DLOG(INFO) << "New naive allocator for " << DeviceName(ctx.device_type) << "(" + << ctx.device_id << ")"; + alloc.reset(new NaiveAllocator(ctx)); + break; + } + case kPooled: { + DLOG(INFO) << "New pooled allocator for " << DeviceName(ctx.device_type) << "(" + << ctx.device_id << ")"; + alloc.reset(new PooledAllocator(ctx)); + break; + } + default: + LOG(FATAL) << "Unknown allocator type: " << type; + } + auto ret = alloc.get(); + m->allocators_.emplace(ctx, std::move(alloc)); + return ret; + } + auto alloc = m->allocators_.at(ctx).get(); + if (alloc->type() != type) { + LOG(WARNING) << "The type of existing allocator for " << DeviceName(ctx.device_type) << "(" + << ctx.device_id << ") is different from the request type (" << alloc->type() + << " vs " << type << ")"; + } + return alloc; +} + Allocator* MemoryManager::GetAllocator(TVMContext ctx) { - std::lock_guard lock(mu_); - if (allocators_.find(ctx) == allocators_.end()) { - DLOG(INFO) << "New allocator for " << DeviceName(ctx.device_type) << "(" << ctx.device_id - << ")"; - std::unique_ptr alloc(new NaiveAllocator(ctx)); - allocators_.emplace(ctx, std::move(alloc)); + MemoryManager* m = MemoryManager::Global(); + std::lock_guard lock(m->mu_); + auto it = m->allocators_.find(ctx); + if (it == m->allocators_.end()) { + LOG(FATAL) << "Allocator for " << DeviceName(ctx.device_type) << "(" << ctx.device_id + << ") has not been created yet."; } - return allocators_.at(ctx).get(); + return it->second.get(); } NDArray Allocator::Empty(std::vector shape, DLDataType dtype, DLContext ctx) { diff --git a/src/runtime/vm/naive_allocator.h b/src/runtime/vm/naive_allocator.h index 5ac2ca6..301acf8 100644 --- a/src/runtime/vm/naive_allocator.h +++ b/src/runtime/vm/naive_allocator.h @@ -24,18 +24,17 @@ #define TVM_RUNTIME_VM_NAIVE_ALLOCATOR_H_ #include +#include #include -#include "memory_manager.h" - namespace tvm { namespace runtime { namespace vm { class NaiveAllocator final : public Allocator { public: - explicit NaiveAllocator(TVMContext ctx) : Allocator(), used_memory_(0), ctx_(ctx) {} + explicit NaiveAllocator(TVMContext ctx) : Allocator(kNaive), used_memory_(0), ctx_(ctx) {} Buffer Alloc(size_t nbytes, size_t alignment, DLDataType type_hint) override { Buffer buf; diff --git a/src/runtime/vm/pooled_allocator.h b/src/runtime/vm/pooled_allocator.h index e09628f..4226ef7 100644 --- a/src/runtime/vm/pooled_allocator.h +++ b/src/runtime/vm/pooled_allocator.h @@ -24,14 +24,13 @@ #define TVM_RUNTIME_VM_POOLED_ALLOCATOR_H_ #include +#include #include #include #include #include -#include "memory_manager.h" - namespace tvm { namespace runtime { namespace vm { @@ -41,7 +40,7 @@ class PooledAllocator final : public Allocator { static constexpr size_t kDefaultPageSize = 4096; explicit PooledAllocator(TVMContext ctx, size_t page_size = kDefaultPageSize) - : Allocator(), page_size_(page_size), used_memory_(0), ctx_(ctx) {} + : Allocator(kPooled), page_size_(page_size), used_memory_(0), ctx_(ctx) {} ~PooledAllocator() { ReleaseAll(); } diff --git a/src/runtime/vm/profiler/vm.cc b/src/runtime/vm/profiler/vm.cc index 6e4682d..7273b56 100644 --- a/src/runtime/vm/profiler/vm.cc +++ b/src/runtime/vm/profiler/vm.cc @@ -25,7 +25,6 @@ #include "vm.h" #include -#include #include #include diff --git a/src/runtime/vm/profiler/vm.h b/src/runtime/vm/profiler/vm.h index c286828..797d414 100644 --- a/src/runtime/vm/profiler/vm.h +++ b/src/runtime/vm/profiler/vm.h @@ -25,7 +25,7 @@ #ifndef TVM_RUNTIME_VM_PROFILER_VM_H_ #define TVM_RUNTIME_VM_PROFILER_VM_H_ -#include +#include #include #include diff --git a/src/runtime/vm/serialize_util.h b/src/runtime/vm/serialize_util.h index 8bd1f86..d52b73d 100644 --- a/src/runtime/vm/serialize_util.h +++ b/src/runtime/vm/serialize_util.h @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 24fc110..9af5202 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -19,32 +19,30 @@ /*! * \file src/runtime/vm/vm.cc - * \brief The Relay virtual machine. + * \brief The Relay virtual machine runtime. */ #include #include #include #include -#include +#include #include #include #include #include -#include #include #include -#include "memory_manager.h" -#include "naive_allocator.h" - using namespace tvm::runtime; namespace tvm { namespace runtime { namespace vm { +TVM_REGISTER_OBJECT_TYPE(VMClosureObj); + VMClosure::VMClosure(size_t func_index, std::vector free_vars) { auto ptr = make_object(); ptr->func_index = func_index; @@ -52,588 +50,6 @@ VMClosure::VMClosure(size_t func_index, std::vector free_vars) { data_ = std::move(ptr); } -inline Storage make_storage(size_t size, size_t alignment, DLDataType dtype_hint, TVMContext ctx) { - // We could put cache in here, from ctx to storage allocator. - auto storage_obj = SimpleObjAllocator().make_object(); - auto alloc = MemoryManager::Global()->GetAllocator(ctx); - DCHECK(alloc != nullptr) << "allocator must not null"; - storage_obj->buffer = alloc->Alloc(size, alignment, dtype_hint); - return Storage(storage_obj); -} - -Instruction::Instruction() {} - -template -static T* Duplicate(T* src, Index size) { - auto dst = new T[size]; - std::copy(src, src + size, dst); - return dst; -} - -Instruction::Instruction(const Instruction& instr) { - this->op = instr.op; - this->dst = instr.dst; - - switch (instr.op) { - case Opcode::Move: - this->from = instr.from; - return; - case Opcode::Fatal: - return; - case Opcode::Ret: - this->result = instr.result; - return; - case Opcode::AllocTensor: - this->alloc_tensor.storage = instr.alloc_tensor.storage; - this->alloc_tensor.offset = instr.alloc_tensor.offset; - this->alloc_tensor.ndim = instr.alloc_tensor.ndim; - this->alloc_tensor.shape = - Duplicate(instr.alloc_tensor.shape, instr.alloc_tensor.ndim); - this->alloc_tensor.dtype = instr.alloc_tensor.dtype; - return; - case Opcode::AllocTensorReg: - this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage; - this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset; - this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register; - this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype; - return; - case Opcode::AllocADT: - this->constructor_tag = instr.constructor_tag; - this->num_fields = instr.num_fields; - this->datatype_fields = Duplicate(instr.datatype_fields, instr.num_fields); - return; - case Opcode::AllocClosure: - this->clo_index = instr.clo_index; - this->num_freevar = instr.num_freevar; - this->free_vars = Duplicate(instr.free_vars, instr.num_freevar); - return; - case Opcode::InvokePacked: - this->packed_index = instr.packed_index; - this->arity = instr.arity; - this->output_size = instr.output_size; - this->packed_args = Duplicate(instr.packed_args, instr.arity); - return; - case Opcode::InvokeClosure: - this->closure = instr.closure; - this->num_closure_args = instr.num_closure_args; - this->closure_args = Duplicate(instr.closure_args, instr.num_closure_args); - return; - case Opcode::Invoke: - this->func_index = instr.func_index; - this->num_args = instr.num_args; - this->invoke_args_registers = Duplicate(instr.invoke_args_registers, instr.num_args); - return; - case Opcode::If: - this->if_op = instr.if_op; - return; - case Opcode::LoadConst: - this->const_index = instr.const_index; - return; - case Opcode::LoadConsti: - this->load_consti = instr.load_consti; - return; - case Opcode::GetField: - this->object = instr.object; - this->field_index = instr.field_index; - return; - case Opcode::GetTag: - this->get_tag = instr.get_tag; - return; - case Opcode::Goto: - this->pc_offset = instr.pc_offset; - return; - case Opcode::AllocStorage: - this->alloc_storage = instr.alloc_storage; - return; - case Opcode::ShapeOf: - this->shape_of.tensor = instr.shape_of.tensor; - return; - case Opcode::ReshapeTensor: - this->reshape_tensor.tensor = instr.reshape_tensor.tensor; - this->reshape_tensor.newshape = instr.reshape_tensor.newshape; - return; - default: - std::ostringstream out; - out << "Invalid instruction " << static_cast(instr.op); - throw std::runtime_error(out.str()); - } -} - -template -static inline void FreeIf(T* t) { - if (t != nullptr) { - delete t; - } -} - -Instruction& Instruction::operator=(const Instruction& instr) { - this->op = instr.op; - this->dst = instr.dst; - - switch (instr.op) { - case Opcode::Move: - this->from = instr.from; - return *this; - case Opcode::Fatal: - return *this; - case Opcode::LoadConsti: - this->load_consti = instr.load_consti; - return *this; - case Opcode::Ret: - this->result = instr.result; - return *this; - case Opcode::AllocTensor: - this->alloc_tensor.storage = this->alloc_tensor.storage; - this->alloc_tensor.offset = instr.alloc_tensor.offset; - this->alloc_tensor.ndim = instr.alloc_tensor.ndim; - this->alloc_tensor.shape = - Duplicate(instr.alloc_tensor.shape, instr.alloc_tensor.ndim); - this->alloc_tensor.dtype = instr.alloc_tensor.dtype; - return *this; - case Opcode::AllocTensorReg: - this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage; - this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset; - this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register; - this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype; - return *this; - case Opcode::AllocADT: - this->constructor_tag = instr.constructor_tag; - this->num_fields = instr.num_fields; - FreeIf(this->datatype_fields); - this->datatype_fields = Duplicate(instr.datatype_fields, instr.num_fields); - return *this; - case Opcode::AllocClosure: - this->clo_index = instr.clo_index; - this->num_freevar = instr.num_freevar; - FreeIf(this->free_vars); - this->free_vars = Duplicate(instr.free_vars, instr.num_freevar); - return *this; - case Opcode::InvokePacked: - this->packed_index = instr.packed_index; - this->arity = instr.arity; - this->output_size = instr.output_size; - FreeIf(this->packed_args); - this->packed_args = Duplicate(instr.packed_args, instr.arity); - return *this; - case Opcode::InvokeClosure: - this->closure = instr.closure; - this->num_closure_args = instr.num_closure_args; - FreeIf(this->closure_args); - this->closure_args = Duplicate(instr.closure_args, instr.num_closure_args); - return *this; - case Opcode::Invoke: - this->func_index = instr.func_index; - this->num_args = instr.num_args; - FreeIf(this->invoke_args_registers); - this->invoke_args_registers = Duplicate(instr.invoke_args_registers, instr.num_args); - return *this; - case Opcode::If: - this->if_op = instr.if_op; - return *this; - case Opcode::LoadConst: - this->const_index = instr.const_index; - return *this; - case Opcode::GetField: - this->object = instr.object; - this->field_index = instr.field_index; - return *this; - case Opcode::GetTag: - this->get_tag = instr.get_tag; - return *this; - case Opcode::Goto: - this->pc_offset = instr.pc_offset; - return *this; - case Opcode::AllocStorage: - this->alloc_storage = instr.alloc_storage; - return *this; - case Opcode::ShapeOf: - this->shape_of.tensor = instr.shape_of.tensor; - return *this; - default: - std::ostringstream out; - out << "Invalid instruction " << static_cast(instr.op); - throw std::runtime_error(out.str()); - } -} - -Instruction::~Instruction() { - switch (this->op) { - case Opcode::Move: - case Opcode::Ret: - case Opcode::AllocTensorReg: - case Opcode::If: - case Opcode::LoadConst: - case Opcode::GetField: - case Opcode::GetTag: - case Opcode::Goto: - case Opcode::LoadConsti: - case Opcode::AllocStorage: - case Opcode::ShapeOf: - case Opcode::ReshapeTensor: - case Opcode::Fatal: - return; - case Opcode::AllocTensor: - delete[] this->alloc_tensor.shape; - return; - case Opcode::AllocADT: - delete[] this->datatype_fields; - return; - case Opcode::AllocClosure: - delete[] this->free_vars; - return; - case Opcode::InvokePacked: - delete[] this->packed_args; - return; - case Opcode::InvokeClosure: - delete[] this->closure_args; - return; - case Opcode::Invoke: - delete[] this->invoke_args_registers; - return; - default: - std::ostringstream out; - LOG(FATAL) << "Invalid instruction " << static_cast(this->op); - } -} - -Instruction Instruction::Ret(RegName result) { - Instruction instr; - instr.op = Opcode::Ret; - instr.result = result; - return instr; -} - -Instruction Instruction::Fatal() { - Instruction instr; - instr.op = Opcode::Fatal; - return instr; -} - -Instruction Instruction::InvokePacked(Index packed_index, Index arity, Index output_size, - const std::vector& args) { - Instruction instr; - instr.op = Opcode::InvokePacked; - instr.packed_index = packed_index; - instr.arity = arity; - instr.output_size = output_size; - instr.packed_args = new RegName[arity]; - for (Index i = 0; i < arity; ++i) { - instr.packed_args[i] = args[i]; - } - return instr; -} - -Instruction Instruction::AllocTensor(RegName storage, RegName offset, - const std::vector& shape, DLDataType dtype, - RegName dst) { - Instruction instr; - instr.op = Opcode::AllocTensor; - instr.dst = dst; - instr.alloc_tensor.storage = storage; - instr.alloc_tensor.offset = offset; - instr.alloc_tensor.ndim = shape.size(); - instr.alloc_tensor.shape = new int64_t[shape.size()]; - for (size_t i = 0; i < shape.size(); ++i) { - instr.alloc_tensor.shape[i] = shape[i]; - } - instr.alloc_tensor.dtype = dtype; - return instr; -} - -Instruction Instruction::AllocTensorReg(RegName storage, RegName offset, RegName shape_register, - DLDataType dtype, RegName dst) { - Instruction instr; - instr.op = Opcode::AllocTensorReg; - instr.dst = dst; - instr.alloc_tensor_reg.storage = storage; - instr.alloc_tensor_reg.offset = offset; - instr.alloc_tensor_reg.shape_register = shape_register; - instr.alloc_tensor_reg.dtype = dtype; - return instr; -} - -Instruction Instruction::AllocStorage(RegName size, Index alignment, DLDataType dtype_hint, - RegName dst) { - Instruction instr; - instr.op = Opcode::AllocStorage; - instr.dst = dst; - instr.alloc_storage.allocation_size = size; - instr.alloc_storage.alignment = alignment; - instr.alloc_storage.dtype_hint = dtype_hint; - return instr; -} - -Instruction Instruction::ShapeOf(RegName tensor, RegName dst) { - Instruction instr; - instr.op = Opcode::ShapeOf; - instr.dst = dst; - instr.shape_of.tensor = tensor; - return instr; -} - -Instruction Instruction::ReshapeTensor(RegName tensor, RegName newshape, RegName dst) { - Instruction instr; - instr.op = Opcode::ReshapeTensor; - instr.dst = dst; - instr.reshape_tensor.tensor = tensor; - instr.reshape_tensor.newshape = newshape; - return instr; -} - -Instruction Instruction::AllocADT(Index tag, Index num_fields, - const std::vector& datatype_fields, RegName dst) { - Instruction instr; - instr.op = Opcode::AllocADT; - instr.dst = dst; - instr.constructor_tag = tag; - instr.num_fields = num_fields; - instr.datatype_fields = new RegName[num_fields]; - for (Index i = 0; i < num_fields; ++i) { - instr.datatype_fields[i] = datatype_fields[i]; - } - return instr; -} - -Instruction Instruction::AllocClosure(Index func_index, Index free_vars, - const std::vector& free_var_register, RegName dst) { - Instruction instr; - instr.op = Opcode::AllocClosure; - instr.dst = dst; - instr.clo_index = func_index; - instr.num_freevar = free_vars; - instr.free_vars = new RegName[instr.num_freevar]; - for (Index i = 0; i < instr.num_freevar; ++i) { - instr.free_vars[i] = free_var_register[i]; - } - return instr; -} - -Instruction Instruction::GetField(RegName object, Index field_index, RegName dst) { - Instruction instr; - instr.op = Opcode::GetField; - instr.dst = dst; - instr.object = object; - instr.field_index = field_index; - return instr; -} - -Instruction Instruction::GetTag(RegName object, RegName dst) { - Instruction instr; - instr.op = Opcode::GetTag; - instr.dst = dst; - instr.get_tag.object = object; - return instr; -} - -Instruction Instruction::If(RegName test, RegName target, Index true_branch, Index false_branch) { - Instruction instr; - instr.op = Opcode::If; - instr.if_op.test = test; - instr.if_op.target = target; - instr.if_op.true_offset = true_branch; - instr.if_op.false_offset = false_branch; - return instr; -} - -Instruction Instruction::Goto(Index pc_offset) { - Instruction instr; - instr.op = Opcode::Goto; - instr.pc_offset = pc_offset; - return instr; -} - -Instruction Instruction::Invoke(Index func_index, const std::vector& args_registers, - RegName dst) { - Instruction instr; - instr.op = Opcode::Invoke; - instr.dst = dst; - instr.func_index = func_index; - instr.num_args = args_registers.size(); - instr.invoke_args_registers = new RegName[instr.num_args]; - for (Index i = 0; i < instr.num_args; ++i) { - instr.invoke_args_registers[i] = args_registers[i]; - } - return instr; -} - -Instruction Instruction::InvokeClosure(RegName closure, const std::vector& args, - RegName dst) { - Instruction instr; - instr.op = Opcode::InvokeClosure; - instr.dst = dst; - instr.closure = closure; - instr.num_closure_args = args.size(); - instr.closure_args = new RegName[args.size()]; - for (size_t i = 0; i < args.size(); ++i) { - instr.closure_args[i] = args[i]; - } - return instr; -} - -Instruction Instruction::LoadConst(Index const_index, RegName dst) { - Instruction instr; - instr.op = Opcode::LoadConst; - instr.dst = dst; - instr.const_index = const_index; - return instr; -} - -Instruction Instruction::LoadConsti(Index val, RegName dst) { - Instruction instr; - instr.op = Opcode::LoadConsti; - instr.dst = dst; - instr.load_consti.val = val; - return instr; -} - -Instruction Instruction::Move(RegName src, RegName dst) { - Instruction instr; - instr.op = Opcode::Move; - instr.dst = dst; - instr.from = src; - return instr; -} - -void DLDatatypePrint(std::ostream& os, const DLDataType& dtype) { - switch (dtype.code) { - case kDLInt: - os << "int"; - break; - case kDLUInt: - os << "uint"; - break; - case kDLFloat: - os << "float"; - break; - } - - os << int(dtype.bits); - if (dtype.lanes != 1) { - os << "x" << dtype.lanes; - } -} - -template -std::string StrJoin(T* items, int offset, int cnt, std::string delim = ", ") { - if (cnt == 0) { - return ""; - } - std::ostringstream oss; - oss << items[offset]; - for (int i = 1; i < cnt; ++i) { - oss << delim << items[offset + i]; - } - return oss.str(); -} - -void InstructionPrint(std::ostream& os, const Instruction& instr) { - switch (instr.op) { - case Opcode::Move: { - os << "move $" << instr.dst << " $" << instr.from; - break; - } - case Opcode::Ret: { - os << "ret $" << instr.result; - break; - } - case Opcode::Fatal: { - os << "fatal"; - break; - } - case Opcode::InvokePacked: { - os << "invoke_packed PackedFunc[" << instr.packed_index << "] (in: $" - << StrJoin(instr.packed_args, 0, instr.arity - instr.output_size, ", $") - << ", out: $" - << StrJoin(instr.packed_args, instr.arity - instr.output_size, instr.output_size, - ", $") - << ")"; - break; - } - case Opcode::AllocTensor: { - os << "alloc_tensor $" << instr.dst << " $" << instr.alloc_tensor.storage << " $" - << instr.alloc_tensor.offset << " [" - << StrJoin(instr.alloc_tensor.shape, 0, instr.alloc_tensor.ndim) << "] "; - DLDatatypePrint(os, instr.alloc_tensor.dtype); - break; - } - case Opcode::AllocTensorReg: { - os << "alloc_tensor_reg $" << instr.dst << " $" << instr.alloc_tensor_reg.storage << " $" - << instr.alloc_tensor_reg.offset << " $" << instr.alloc_tensor_reg.shape_register << " "; - DLDatatypePrint(os, instr.alloc_tensor_reg.dtype); - break; - } - case Opcode::AllocADT: { - os << "alloc_data $" << instr.dst << " tag(" << instr.constructor_tag << ") [$" - << StrJoin(instr.datatype_fields, 0, instr.num_fields, ",$") << "]"; - break; - } - case Opcode::AllocClosure: { - os << "alloc_closure $" << instr.dst << " VMFunc[" << instr.clo_index << "]($" - << StrJoin(instr.free_vars, 0, instr.num_freevar, ",$") << ")"; - break; - } - case Opcode::If: { - os << "if " - << "$" << instr.if_op.test << " $" << instr.if_op.target << " " << instr.if_op.true_offset - << " " << instr.if_op.false_offset; - break; - } - case Opcode::Invoke: { - os << "invoke $" << instr.dst << " VMFunc[" << instr.func_index << "]($" - << StrJoin(instr.invoke_args_registers, 0, instr.num_args, ",$") << ")"; - break; - } - case Opcode::InvokeClosure: { - os << "invoke_closure $" << instr.dst << " $" << instr.closure << "($" - << StrJoin(instr.closure_args, 0, instr.num_closure_args, ",$") << ")"; - break; - } - case Opcode::LoadConst: { - os << "load_const $" << instr.dst << " Const[" << instr.const_index << "]"; - break; - } - case Opcode::LoadConsti: { - os << "load_consti $" << instr.dst << " " << instr.load_consti.val; - break; - } - case Opcode::GetField: { - os << "get_field $" << instr.dst << " $" << instr.object << "[" << instr.field_index << "]"; - break; - } - case Opcode::GetTag: { - os << "get_tag $" << instr.dst << " $" << instr.get_tag.object; - break; - } - case Opcode::Goto: { - os << "goto " << instr.pc_offset; - break; - } - case Opcode::AllocStorage: { - os << "alloc_storage $" << instr.dst << " $" << instr.alloc_storage.allocation_size << " " - << instr.alloc_storage.alignment << " " - << DLDataType2String(instr.alloc_storage.dtype_hint); - break; - } - case Opcode::ShapeOf: { - os << "shape_of $" << instr.dst << " $" << instr.shape_of.tensor; - break; - } - case Opcode::ReshapeTensor: { - os << "reshape_tensor $" << instr.dst << " $" << instr.reshape_tensor.tensor << " $" - << instr.reshape_tensor.newshape; - break; - } - default: - LOG(FATAL) << "should never hit this case" << static_cast(instr.op); - break; - } -} - -std::ostream& operator<<(std::ostream& os, const Instruction& instr) { - InstructionPrint(os, instr); - return os; -} - void VMFunctionPrint(std::ostream& os, const VMFunction& vm_func) { os << vm_func.name << ": " << std::endl; for (size_t i = 0; i < vm_func.instructions.size(); ++i) { @@ -707,16 +123,19 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name, }); } else if (name == "init") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - CHECK_EQ(args.size() % 2, 0); + CHECK_EQ(args.size() % 3, 0); std::vector contexts; - for (int i = 0; i < args.size() / 2; ++i) { + std::vector alloc_types; + for (int i = 0; i < args.size() / 3; ++i) { TVMContext ctx; - int device_type = args[i * 2]; + int device_type = args[i * 3]; ctx.device_type = DLDeviceType(device_type); - ctx.device_id = args[i * 2 + 1]; + ctx.device_id = args[i * 3 + 1]; + int type = args[i * 3 + 2]; contexts.push_back(ctx); + alloc_types.push_back(AllocatorType(type)); } - this->Init(contexts); + this->Init(contexts, alloc_types); }); } else if (name == "set_input") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { @@ -793,9 +212,6 @@ ObjectRef VirtualMachine::Invoke(const VMFunction& func, const std::vectorGetAllocator(ctxs_[0]); - DLOG(INFO) << "Memory used: " << alloc->UsedMemory() << " B"; return return_register_; } @@ -864,7 +280,15 @@ void VirtualMachine::LoadExecutable(const Executable* exec) { } } -void VirtualMachine::Init(const std::vector& ctxs) { ctxs_ = ctxs; } +void VirtualMachine::Init(const std::vector& ctxs, + const std::vector& alloc_types) { + CHECK_EQ(ctxs.size(), alloc_types.size()); + ctxs_ = ctxs; + for (size_t i = 0; i < ctxs.size(); ++i) { + auto alloc = MemoryManager::GetOrCreateAllocator(ctxs[i], alloc_types[i]); + allocators_.emplace(ctxs[i], alloc); + } +} inline void VirtualMachine::WriteRegister(Index r, const ObjectRef& val) { frames_.back().register_file[r] = val; @@ -1090,7 +514,13 @@ void VirtualMachine::RunLoop() { DLOG(INFO) << "AllocStorage: allocation_size=" << size << "alignment=" << alignment << "dtype_hint=" << DLDataType2String(instr.alloc_storage.dtype_hint); - auto storage = make_storage(size, alignment, instr.alloc_storage.dtype_hint, ctxs_[0]); + auto storage_obj = SimpleObjAllocator().make_object(); + auto it = allocators_.find(ctxs_[0]); + CHECK(it != allocators_.end()) + << "Did you forget to init the VirtualMachine with contexts?"; + auto alloc = it->second; + storage_obj->buffer = alloc->Alloc(size, alignment, instr.alloc_storage.dtype_hint); + Storage storage(storage_obj); WriteRegister(instr.dst, storage); pc_++; goto main_loop; diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index 3b9d4d4..5c6bd6f 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -135,8 +135,7 @@ def run_tvm_graph(graph_def, input_data, input_node, num_output=1, elif mode == 'vm': with tvm.transform.PassContext(opt_level=opt_level, disabled_pass=disabled_pass): vm_exec = relay.vm.compile(mod, target="llvm", params=params) - vm = VirtualMachine(vm_exec) - vm.init(tvm.cpu()) + vm = VirtualMachine(vm_exec, tvm.cpu()) inputs = {} for e, i in zip(input_node, input_data): inputs[e] = tvm.nd.array(i) diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py index a6e05be..80e9e41 100644 --- a/tests/python/relay/benchmarking/benchmark_vm.py +++ b/tests/python/relay/benchmarking/benchmark_vm.py @@ -61,8 +61,7 @@ def benchmark_execution(mod, number=2, repeat=20): with tvm.transform.PassContext(opt_level=3): exe = vm.compile(mod, target, params=params) - rly_vm = vm_rt.VirtualMachine(exe) - rly_vm.init(ctx) + rly_vm = vm_rt.VirtualMachine(exe, ctx) result = rly_vm.run(data) if measure: diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py index 6771bd1..216d23e 100644 --- a/tests/python/relay/test_external_codegen.py +++ b/tests/python/relay/test_external_codegen.py @@ -55,8 +55,7 @@ def check_result(mod, map_inputs, out_shape, result, tol=1e-5, target="llvm", code, lib = exe.save() lib = update_lib(lib) exe = runtime.vm.Executable.load_exec(code, lib) - vm = runtime.vm.VirtualMachine(exe) - vm.init(ctx) + vm = runtime.vm.VirtualMachine(exe, ctx) out = vm.run(**map_inputs) tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol) diff --git a/tests/python/relay/test_json_runtime.py b/tests/python/relay/test_json_runtime.py index a886692..cf3b2b2 100644 --- a/tests/python/relay/test_json_runtime.py +++ b/tests/python/relay/test_json_runtime.py @@ -71,8 +71,7 @@ def check_result(mod, exe = relay.vm.compile(mod, target=target, params=params) code, lib = exe.save() exe = runtime.vm.Executable.load_exec(code, lib) - vm = runtime.vm.VirtualMachine(exe) - vm.init(ctx) + vm = runtime.vm.VirtualMachine(exe, ctx) out = vm.run(**map_inputs) tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol) diff --git a/tests/python/relay/test_pass_annotate_target.py b/tests/python/relay/test_pass_annotate_target.py index 273c27b..46989da 100644 --- a/tests/python/relay/test_pass_annotate_target.py +++ b/tests/python/relay/test_pass_annotate_target.py @@ -56,8 +56,7 @@ def check_result(mod, map_inputs, out_shape, result, tol=1e-5, target="llvm", code, lib = exe.save() lib = update_lib(lib) exe = runtime.vm.Executable.load_exec(code, lib) - vm = runtime.vm.VirtualMachine(exe) - vm.init(ctx) + vm = runtime.vm.VirtualMachine(exe, ctx) out = vm.run(**map_inputs) tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 84474f6..58bb16d 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -200,8 +200,7 @@ def check_result(mod, map_inputs, out_shape, result, tol=1e-5, target="llvm", code, lib = exe.save() lib = update_lib(lib) exe = runtime.vm.Executable.load_exec(code, lib) - vm = runtime.vm.VirtualMachine(exe) - vm.init(ctx) + vm = runtime.vm.VirtualMachine(exe, ctx) outs = vm.run(**map_inputs) outs = outs if isinstance(outs, runtime.container.ADT) else [outs] results = result if isinstance(result, list) else [result] diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py index 91214cb..d3bb084 100644 --- a/tests/python/relay/test_vm.py +++ b/tests/python/relay/test_vm.py @@ -45,7 +45,6 @@ def check_result(args, expected_result, mod=None): if "cuda" in target: continue vm = relay.create_executor('vm', ctx=ctx, target=target, mod=mod) - rts_result = vm.evaluate()(*args) tvm.testing.assert_allclose(expected_result, rts_result.asnumpy()) @@ -57,8 +56,7 @@ def veval(f, *args, ctx=tvm.cpu(), target="llvm"): assert isinstance(f, tvm.IRModule), "expected expression or module" mod = f exe = relay.vm.compile(mod, target) - vm = runtime.vm.VirtualMachine(exe) - vm.init(ctx) + vm = runtime.vm.VirtualMachine(exe, ctx) return vm.invoke("main", *args) def vmobj_to_list(o): diff --git a/tests/python/relay/test_vm_serialization.py b/tests/python/relay/test_vm_serialization.py index 95e6c6f..d1bcdcc 100644 --- a/tests/python/relay/test_vm_serialization.py +++ b/tests/python/relay/test_vm_serialization.py @@ -45,8 +45,7 @@ def get_serialized_output(mod, *data, params=None, target="llvm", exe = create_exec(mod, target, params=params) code, lib = exe.save() des_exec = _vm.Executable.load_exec(code, lib) - des_vm = _vm.VirtualMachine(des_exec) - des_vm.init(ctx) + des_vm = _vm.VirtualMachine(des_exec, ctx) result = des_vm.run(*data) return result @@ -135,8 +134,7 @@ def test_save_load(): # deserialize. des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib) - des_vm = _vm.VirtualMachine(des_exec) - des_vm.init(tvm.cpu()) + des_vm = _vm.VirtualMachine(des_exec, tvm.cpu()) res = des_vm.run(x_data) tvm.testing.assert_allclose(res.asnumpy(), x_data + x_data) diff --git a/tests/python/unittest/test_runtime_vm_profiler.py b/tests/python/unittest/test_runtime_vm_profiler.py index 064b733..97b54c6 100644 --- a/tests/python/unittest/test_runtime_vm_profiler.py +++ b/tests/python/unittest/test_runtime_vm_profiler.py @@ -29,8 +29,7 @@ def test_basic(): if not profiler_vm.enabled(): return exe = relay.vm.compile(mod, target, params=params) - vm = profiler_vm.VirtualMachineProfiler(exe) - vm.init(ctx) + vm = profiler_vm.VirtualMachineProfiler(exe, ctx) data = np.random.rand(1, 3, 224, 224).astype('float32') res = vm.invoke("main", [data])