verilator --version
```
-the supported version of Verilator should be at least 4.012,
+the supported version of Verilator should be at least 4.012,
if homebrew (OSX) or package-manager (Linux) does not support that version,
please install Verilator 4.012 or later from binary or source base on following
instruction of Verilator wiki.
1. Install `verilator` and `sbt` as described above
2. Get tvm `git clone https://github.com/dmlc/tvm.git`
-3. Change VTA target in `tvm/vta/config/vta_config.json` from `sim` to `tsim`
-4. Build [tvm](https://docs.tvm.ai/install/from_source.html#build-the-shared-library)
+3. Build [tvm](https://docs.tvm.ai/install/from_source.html#build-the-shared-library)
## How to run VTA TSIM examples
There are two sample VTA accelerators, add-a-constant, designed in Chisel3 and Verilog to show how *TSIM* works.
-The default `TARGET` language for these two implementations is Verilog. The following instructions show
+The default target language for these two implementations is Verilog. The following instructions show
how to run both of them:
* Test Verilog backend
from sys import platform
def get_ext():
+ """Return shared library extension"""
return ".dylib" if platform == "darwin" else ".so"
def load_dll(dll):
+ """Load shared library
+
+ Parameters
+ ------------
+ dll : str
+ Path for shared library
+
+ Returns
+ ------------
+ The shared library
+ """
try:
return [ctypes.CDLL(dll, ctypes.RTLD_GLOBAL)]
except OSError:
return []
def load_sw():
+ """Load all software shared libraries"""
cur_path = osp.dirname(osp.abspath(osp.expanduser(__file__)))
sw_libname = "libsw" + get_ext()
sw_lib = osp.join(cur_path, "..", "build", sw_libname)
def init(hw_backend):
"""Init hardware and software shared library for accelerator
- Parameters
- ------------
- hw_backend : str
+ Parameters
+ ------------
+ hw_backend : str
Hardware backend can be verilog or chisel
"""
hw_libname = "libhw" + get_ext()
if hw_backend in ("verilog", "chisel"):
hw_lib = osp.join(cur_path, "..", "hardware", hw_backend, "build", hw_libname)
- m = tvm.module.load(hw_lib, "vta-tsim")
load_sw()
+ m = tvm.module.load(hw_lib, "vta-tsim")
f = tvm.get_global_func("tvm.vta.tsim.init")
f(m)
def load_module():
+ """Return driver function"""
load_sw()
return tvm.get_global_func("tvm.vta.driver")
#include <tvm/runtime/registry.h>
#include <vta/dpi/module.h>
+#include "vmem/virtual_memory.h"
+
namespace vta {
namespace driver {
-uint32_t get_half_addr(void *p, bool upper) {
- if (upper) {
- return ((uint64_t) ((uint64_t*) p)) >> 32;
- } else {
- return ((uint64_t) ((uint64_t*) p));
- }
-}
-
using vta::dpi::DPIModuleNode;
using tvm::runtime::Module;
loader_ = DPILoader::Global();
}
- uint32_t Run(uint32_t c, uint32_t length, void* inp, void* out) {
+ uint32_t Run(uint32_t c, DLTensor* a, DLTensor* b) {
uint32_t cycles;
+ uint32_t len = a->shape[0];
+ size_t size = (a->dtype.bits >> 3) * len;
+ a_ = this->MemAlloc(size);
+ b_ = this->MemAlloc(size);
+ this->MemCopyFromHost(a_, a->data, size);
this->Init();
- this->Launch(c, length, inp, out);
+ this->Launch(c, len);
cycles = this->WaitForCompletion();
+ this->MemCopyToHost(b->data, b_, size);
+ this->MemFree(a_);
+ this->MemFree(b_);
return cycles;
}
dpi_->SimResume();
}
- void Launch(uint32_t c, uint32_t length, void* inp, void* out) {
+ void* MemAlloc(size_t size) {
+ void * addr = vta::vmem::VirtualMemoryManager::Global()->Alloc(size);
+ return reinterpret_cast<void*>(vta::vmem::VirtualMemoryManager::Global()->GetPhyAddr(addr));
+ }
+
+ void MemFree(void* buf) {
+ void * addr = vta::vmem::VirtualMemoryManager::Global()->GetAddr(reinterpret_cast<uint64_t>(buf));
+ vta::vmem::VirtualMemoryManager::Global()->Free(addr);
+ }
+
+ vta_phy_addr_t MemGetPhyAddr(void* buf) {
+ return reinterpret_cast<uint64_t>(reinterpret_cast<uint64_t*>(buf));
+ }
+
+ void MemCopyFromHost(void* dst, const void* src, size_t size) {
+ vta::vmem::VirtualMemoryManager::Global()->MemCopyFromHost(dst, src, size);
+ }
+
+ void MemCopyToHost(void* dst, const void* src, size_t size) {
+ vta::vmem::VirtualMemoryManager::Global()->MemCopyToHost(dst, src, size);
+ }
+
+ void Launch(uint32_t c, uint32_t len) {
dpi_->WriteReg(0x08, c);
- dpi_->WriteReg(0x0c, length);
- dpi_->WriteReg(0x10, get_half_addr(inp, false));
- dpi_->WriteReg(0x14, get_half_addr(inp, true));
- dpi_->WriteReg(0x18, get_half_addr(out, false));
- dpi_->WriteReg(0x1c, get_half_addr(out, true));
+ dpi_->WriteReg(0x0c, len);
+ dpi_->WriteReg(0x10, this->MemGetPhyAddr(a_));
+ dpi_->WriteReg(0x14, 0);
+ dpi_->WriteReg(0x18, this->MemGetPhyAddr(b_));
+ dpi_->WriteReg(0x1c, 0);
dpi_->WriteReg(0x00, 0x1); // launch
}
DPILoader* loader_{nullptr};
// DPI Module
DPIModuleNode* dpi_{nullptr};
+ // input vm ptr
+ void* a_{nullptr};
+ // output vm ptr
+ void* b_{nullptr};
};
using tvm::runtime::TVMRetValue;
TVM_REGISTER_GLOBAL("tvm.vta.driver")
.set_body([](TVMArgs args, TVMRetValue* rv) {
+ Device dev_;
DLTensor* A = args[0];
DLTensor* B = args[1];
- Device dev_;
- uint32_t cycles = dev_.Run(static_cast<int>(args[2]), A->shape[0], A->data, B->data);
+ uint32_t c = static_cast<int>(args[2]);
+ uint32_t cycles = dev_.Run(c, A, B);
*rv = static_cast<int>(cycles);
});