set(CMAKE_CXX_FLAGS "-faligned-new ${CMAKE_CXX_FLAGS}")
endif()
-# Module rules
-include(cmake/modules/hw.cmake)
-include(cmake/modules/sw.cmake)
+file(GLOB TSIM_SW_SRC src/driver.cc)
+add_library(sw SHARED ${TSIM_SW_SRC})
+target_include_directories(sw PRIVATE ${VTA_DIR}/include)
+
+if(APPLE)
+ set_target_properties(sw PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+endif(APPLE)
export PYTHONPATH:=$(PWD)/python:$(PYTHONPATH)
-BUILD_DIR = $(shell python3 config/config.py --get-build-name)
+BUILD_NAME = build
+build_dir = $(abspath .)/$(BUILD_NAME)
-default: cmake run
+default: verilog driver run_verilog
+run_chisel: chisel driver
+ python3 tests/python/chisel_accel.py
+
.PHONY: cmake
-cmake: | $(BUILD_DIR)
- cd $(BUILD_DIR) && cmake .. && make
+driver: | $(build_dir)
+ cd $(build_dir) && cmake .. && make
-$(BUILD_DIR):
+$(build_dir):
mkdir -p $@
-run:
- python3 tests/python/add_by_one.py | grep PASS
+verilog:
+ make -C hardware/verilog
+
+chisel:
+ make -C hardware/chisel
+
+run_verilog:
+ python3 tests/python/verilog_accel.py
clean:
- -rm -rf $(BUILD_DIR)
+ -rm -rf $(build_dir)
+ make -C hardware/chisel clean
+ make -C hardware/verilog clean
## Setup in TVM
1. Install `verilator` and `sbt` as described above
-2. Change `TARGET` to `tsim` in `<tvm-root>/tvm/vta/config/vta_config.json`
-3. Build [tvm](https://docs.tvm.ai/install/from_source.html#build-the-shared-library)
+2. Build [tvm](https://docs.tvm.ai/install/from_source.html#build-the-shared-library)
## How to run VTA TSIM examples
-There are two sample VTA accelerators (add-by-one) designed in Chisel3 and Verilog to show how *TSIM* works.
+There are two sample VTA accelerators, add-a-constant, designed in Chisel3 and Verilog to show how *TSIM* works.
The default `TARGET` language for these two implementations is Verilog. The following instructions show
how to run both of them:
-* Verilog add-by-one
+* Test Verilog backend
* Go to `<tvm-root>/vta/apps/tsim_example`
- * Run `make` to build and run add-by-one test
+ * Run `make`
-* Chisel3 add-by-one
- * Open `<tvm-root>/vta/apps/tsim_example/python/tsim/config.json`
- * Change `TARGET` from `verilog` to `chisel`
- * Go to `tvm/vta/apps/tsim_example`
- * Run `make` to build and run add-by-one test
+* Test Chisel3 backend
+ * Open `<tvm-root>/vta/apps/tsim_example`
+ * Run `make run_chisel`
* Some pointers
- * Add-by-one test `<tvm-root>/vta/apps/tsim_example/tests/python/add_by_one.py`
- * Add-by-one accelerator in Verilog `<tvm-root>/vta/apps/tsim_example/hardware/verilog`
- * Add-by-one accelerator in Chisel3 `<tvm-root>/vta/apps/tsim_example/hardware/chisel`
- * Software driver that handles the accelerator `<tvm-root>/vta/apps/tsim_example/src/driver.cc`
- * Build cmake script for software library`<tvm-root>/vta/apps/tsim_example/cmake/modules/sw.cmake`
- * Build cmake script for hardware library`<tvm-root>/vta/apps/tsim_example/cmake/modules/hw.cmake`
+ * Verilog and Chisel3 tests in `<tvm-root>/vta/apps/tsim_example/tests/python`
+ * Verilog accelerator backend `<tvm-root>/vta/apps/tsim_example/hardware/verilog`
+ * Chisel3 accelerator backend `<tvm-root>/vta/apps/tsim_example/hardware/chisel`
+ * Software C++ driver (backend) that handles the accelerator `<tvm-root>/vta/apps/tsim_example/src/driver.cc`
+ * Software Python driver (frontend) that handles the accelerator `<tvm-root>/vta/apps/tsim_example/python/accel`
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(MSVC)
- message(STATUS "[TSIM_HW] build is skipped in Windows..")
-else()
- find_program(PYTHON NAMES python python3 python3.6)
- find_program(VERILATOR NAMES verilator)
-
- if (VERILATOR AND PYTHON)
-
- if (TSIM_TOP_NAME STREQUAL "")
- message(FATAL_ERROR "[TSIM_HW] TSIM_TOP_NAME should be defined")
- endif()
-
- if (TSIM_BUILD_NAME STREQUAL "")
- message(FATAL_ERROR "[TSIM_HW] TSIM_BUILD_NAME should be defined")
- endif()
-
- set(TSIM_CONFIG ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/config/config.py)
-
- execute_process(COMMAND ${TSIM_CONFIG} --get-target OUTPUT_VARIABLE TSIM_TARGET OUTPUT_STRIP_TRAILING_WHITESPACE)
- execute_process(COMMAND ${TSIM_CONFIG} --get-top-name OUTPUT_VARIABLE TSIM_TOP_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
- execute_process(COMMAND ${TSIM_CONFIG} --get-build-name OUTPUT_VARIABLE TSIM_BUILD_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
- execute_process(COMMAND ${TSIM_CONFIG} --get-use-trace OUTPUT_VARIABLE TSIM_USE_TRACE OUTPUT_STRIP_TRAILING_WHITESPACE)
- execute_process(COMMAND ${TSIM_CONFIG} --get-trace-name OUTPUT_VARIABLE TSIM_TRACE_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
-
- set(TSIM_BUILD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/${TSIM_BUILD_NAME})
-
- if (TSIM_TARGET STREQUAL "chisel")
-
- find_program(SBT NAMES sbt)
-
- if (SBT)
-
- # Install Chisel VTA package for DPI modules
- set(VTA_CHISEL_DIR ${VTA_DIR}/hardware/chisel)
-
- execute_process(WORKING_DIRECTORY ${VTA_CHISEL_DIR}
- COMMAND ${SBT} publishLocal RESULT_VARIABLE RETCODE)
-
- if (NOT RETCODE STREQUAL "0")
- message(FATAL_ERROR "[TSIM_HW] sbt failed to install VTA scala package")
- endif()
-
- # Chisel - Scala to Verilog compilation
- set(TSIM_CHISEL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/hardware/chisel)
- set(CHISEL_BUILD_DIR ${TSIM_BUILD_DIR}/chisel)
- set(CHISEL_OPT "test:runMain test.Elaborate --target-dir ${CHISEL_BUILD_DIR} --top-name ${TSIM_TOP_NAME}")
-
- execute_process(WORKING_DIRECTORY ${TSIM_CHISEL_DIR} COMMAND ${SBT} ${CHISEL_OPT} RESULT_VARIABLE RETCODE)
-
- if (NOT RETCODE STREQUAL "0")
- message(FATAL_ERROR "[TSIM_HW] sbt failed to compile from Chisel to Verilog.")
- endif()
-
- file(GLOB VERILATOR_RTL_SRC ${CHISEL_BUILD_DIR}/*.v)
-
- else()
- message(FATAL_ERROR "[TSIM_HW] sbt should be installed for Chisel")
- endif() # sbt
-
- elseif (TSIM_TARGET STREQUAL "verilog")
-
- set(VTA_VERILOG_DIR ${VTA_DIR}/hardware/chisel/src/main/resources/verilog)
- set(TSIM_VERILOG_DIR ${CMAKE_CURRENT_SOURCE_DIR}/hardware/verilog)
- file(GLOB VERILATOR_RTL_SRC ${VTA_VERILOG_DIR}/*.v ${TSIM_VERILOG_DIR}/*.v)
-
- else()
- message(FATAL_ERROR "[TSIM_HW] target language can be only verilog or chisel...")
- endif() # TSIM_TARGET
-
- if (TSIM_TARGET STREQUAL "chisel" OR TSIM_TARGET STREQUAL "verilog")
-
- # Check if tracing can be enabled
- if (NOT TSIM_USE_TRACE STREQUAL "off")
- message(STATUS "[TSIM_HW] Verilog enable tracing")
- else()
- message(STATUS "[TSIM_HW] Verilator disable tracing")
- endif()
-
- # Verilator - Verilog to C++ compilation
- set(VERILATOR_BUILD_DIR ${TSIM_BUILD_DIR}/verilator)
- set(VERILATOR_OPT +define+RANDOMIZE_GARBAGE_ASSIGN +define+RANDOMIZE_REG_INIT)
- list(APPEND VERILATOR_OPT +define+RANDOMIZE_MEM_INIT --x-assign unique)
- list(APPEND VERILATOR_OPT --output-split 20000 --output-split-cfuncs 20000)
- list(APPEND VERILATOR_OPT --top-module ${TSIM_TOP_NAME} -Mdir ${VERILATOR_BUILD_DIR})
- list(APPEND VERILATOR_OPT --cc ${VERILATOR_RTL_SRC})
-
- if (NOT TSIM_USE_TRACE STREQUAL "off")
- list(APPEND VERILATOR_OPT --trace)
- endif()
-
- execute_process(COMMAND ${VERILATOR} ${VERILATOR_OPT} RESULT_VARIABLE RETCODE)
-
- if (NOT RETCODE STREQUAL "0")
- message(FATAL_ERROR "[TSIM_HW] Verilator failed to compile Verilog to C++...")
- endif()
-
- # Build shared library (.so)
- set(VTA_HW_DPI_DIR ${VTA_DIR}/hardware/dpi)
- if (EXISTS /usr/local/share/verilator/include)
- set(VERILATOR_INC_DIR /usr/local/share/verilator/include)
- elseif (EXISTS /usr/share/verilator/include)
- set(VERILATOR_INC_DIR /usr/share/verilator/include)
- else()
- message(FATAL_ERROR "[TSIM_HW] Verilator include directory not found")
- endif()
- set(VERILATOR_LIB_SRC ${VERILATOR_INC_DIR}/verilated.cpp ${VERILATOR_INC_DIR}/verilated_dpi.cpp)
-
- if (NOT TSIM_USE_TRACE STREQUAL "off")
- list(APPEND VERILATOR_LIB_SRC ${VERILATOR_INC_DIR}/verilated_vcd_c.cpp)
- endif()
-
- file(GLOB VERILATOR_GEN_SRC ${VERILATOR_BUILD_DIR}/*.cpp)
- file(GLOB VERILATOR_SRC ${VTA_HW_DPI_DIR}/tsim_device.cc)
- add_library(hw SHARED ${VERILATOR_LIB_SRC} ${VERILATOR_GEN_SRC} ${VERILATOR_SRC})
-
- set(VERILATOR_DEF VL_USER_FINISH VL_TSIM_NAME=V${TSIM_TOP_NAME} VL_PRINTF=printf VM_COVERAGE=0 VM_SC=0)
- if (NOT TSIM_USE_TRACE STREQUAL "off")
- list(APPEND VERILATOR_DEF VM_TRACE=1 TSIM_TRACE_FILE=${TSIM_BUILD_DIR}/${TSIM_TRACE_NAME}.vcd)
- else()
- list(APPEND VERILATOR_DEF VM_TRACE=0)
- endif()
- target_compile_definitions(hw PRIVATE ${VERILATOR_DEF})
- target_compile_options(hw PRIVATE -Wno-sign-compare -include V${TSIM_TOP_NAME}.h)
- target_include_directories(hw PRIVATE ${VERILATOR_BUILD_DIR} ${VERILATOR_INC_DIR} ${VERILATOR_INC_DIR}/vltstd ${VTA_DIR}/include)
-
- if(APPLE)
- set_target_properties(hw PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
- endif(APPLE)
-
- endif() # TSIM_TARGET STREQUAL "chisel" OR TSIM_TARGET STREQUAL "verilog"
-
- else()
- message(STATUS "[TSIM_HW] could not find Python or Verilator, build is skipped...")
- endif() # VERILATOR
-endif() # MSVC
+++ /dev/null
-{
- "TARGET" : "verilog",
- "TOP_NAME" : "TestAccel",
- "BUILD_NAME" : "build",
- "USE_TRACE" : "off",
- "TRACE_NAME" : "trace"
-}
+++ /dev/null
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os.path as osp
-import sys
-import json
-import argparse
-
-cur = osp.abspath(osp.dirname(__file__))
-cfg = json.load(open(osp.join(cur, 'config.json')))
-
-def main():
- """Main function"""
- parser = argparse.ArgumentParser()
- parser.add_argument("--get-target", action="store_true",
- help="Get target language, i.e. verilog or chisel")
- parser.add_argument("--get-top-name", action="store_true",
- help="Get hardware design top name")
- parser.add_argument("--get-build-name", action="store_true",
- help="Get build folder name")
- parser.add_argument("--get-use-trace", action="store_true",
- help="Get use trace")
- parser.add_argument("--get-trace-name", action="store_true",
- help="Get trace filename")
- args = parser.parse_args()
-
- if len(sys.argv) == 1:
- parser.print_help()
- return
-
- if args.get_target:
- print(cfg['TARGET'])
-
- if args.get_top_name:
- print(cfg['TOP_NAME'])
-
- if args.get_build_name:
- print(cfg['BUILD_NAME'])
-
- if args.get_use_trace:
- print(cfg['USE_TRACE'])
-
- if args.get_trace_name:
- print(cfg['TRACE_NAME'])
-
-if __name__ == "__main__":
- main()
# specific language governing permissions and limitations
# under the License.
+ifeq (, $(shell which verilator))
+ $(error "No Verilator in $(PATH), consider doing apt-get install verilator")
+endif
+
+# Change VERILATOR_INC_DIR if Verilator is installed on a different location
+ifeq (, $(VERILATOR_INC_DIR))
+ ifeq (, $(wildcard /usr/local/share/verilator/include/*))
+ ifeq (, $(wildcard /usr/share/verilator/include/*))
+ $(error "Verilator include directory is not set properly")
+ else
+ VERILATOR_INC_DIR := /usr/share/verilator/include
+ endif
+ else
+ VERILATOR_INC_DIR := /usr/local/share/verilator/include
+ endif
+endif
+
+TOP = TestAccel
+BUILD_NAME = build
+USE_TRACE = 0
+LIBNAME = libhw
+
+vta_dir = $(abspath ../../../../)
+tvm_dir = $(abspath ../../../../../)
+build_dir = $(abspath .)/$(BUILD_NAME)
+verilator_build_dir = $(build_dir)/verilator
+chisel_build_dir = $(build_dir)/chisel
+
+verilator_opt = --cc
+verilator_opt += +define+RANDOMIZE_GARBAGE_ASSIGN
+verilator_opt += +define+RANDOMIZE_REG_INIT
+verilator_opt += +define+RANDOMIZE_MEM_INIT
+verilator_opt += --x-assign unique
+verilator_opt += --output-split 20000
+verilator_opt += --output-split-cfuncs 20000
+verilator_opt += --top-module ${TOP}
+verilator_opt += -Mdir ${verilator_build_dir}
+verilator_opt += -I$(chisel_build_dir)
+
+cxx_flags = -O2 -Wall -fPIC -shared
+cxx_flags += -fvisibility=hidden -std=c++11
+cxx_flags += -DVL_TSIM_NAME=V$(TOP)
+cxx_flags += -DVL_PRINTF=printf
+cxx_flags += -DVL_USER_FINISH
+cxx_flags += -DVM_COVERAGE=0
+cxx_flags += -DVM_SC=0
+cxx_flags += -Wno-sign-compare
+cxx_flags += -include V$(TOP).h
+cxx_flags += -I$(verilator_build_dir)
+cxx_flags += -I$(VERILATOR_INC_DIR)
+cxx_flags += -I$(VERILATOR_INC_DIR)/vltstd
+cxx_flags += -I$(vta_dir)/include
+cxx_flags += -I$(tvm_dir)/include
+cxx_flags += -I$(tvm_dir)/3rdparty/dlpack/include
+
+cxx_files = $(VERILATOR_INC_DIR)/verilated.cpp
+cxx_files += $(VERILATOR_INC_DIR)/verilated_dpi.cpp
+cxx_files += $(wildcard $(verilator_build_dir)/*.cpp)
+cxx_files += $(vta_dir)/hardware/dpi/tsim_device.cc
+
+ifneq ($(USE_TRACE), 0)
+ verilator_opt += --trace
+ cxx_flags += -DVM_TRACE=1
+ cxx_flags += -DTSIM_TRACE_FILE=$(verilator_build_dir)/$(TOP).vcd
+ cxx_files += $(VERILATOR_INC_DIR)/verilated_vcd_c.cpp
+else
+ cxx_flags += -DVM_TRACE=0
+endif
+
+default: lib
+
+lib: $(build_dir)/$(LIBNAME).so
+$(build_dir)/$(LIBNAME).so: $(verilator_build_dir)/V$(TOP).cpp
+ echo $(cxx_files)
+ g++ $(cxx_flags) $(cxx_files) -o $@
+
+verilator: $(verilator_build_dir)/V$(TOP).cpp
+$(verilator_build_dir)/V$(TOP).cpp: $(chisel_build_dir)/$(TOP).v
+ verilator $(verilator_opt) $<
+
+verilog: $(chisel_build_dir)/$(TOP).v
+$(chisel_build_dir)/$(TOP).v: install_vta_package
+ sbt 'test:runMain test.Elaborate --target-dir $(chisel_build_dir) --top-name $(TOP)'
+
+install_vta_package:
+ cd $(vta_dir)/hardware/chisel && sbt publishLocal
+
clean:
- -rm -rf target project/target project/project
+ -rm -rf $(build_dir) target project/target project/project
* |_________| |_________|
*
*/
+case class AccelConfig() {
+ val nCtrl = 1
+ val nECnt = 1
+ val nVals = 2
+ val nPtrs = 2
+ val regBits = 32
+ val ptrBits = 2*regBits
+}
+
class Accel extends Module {
val io = IO(new Bundle {
val host = new VTAHostDPIClient
val mem = new VTAMemDPIMaster
})
+ implicit val config = AccelConfig()
val rf = Module(new RegFile)
val ce = Module(new Compute)
rf.io.host <> io.host
io.mem <> ce.io.mem
ce.io.launch := rf.io.launch
rf.io.finish := ce.io.finish
- ce.io.length := rf.io.length
- ce.io.inp_baddr := rf.io.inp_baddr
- ce.io.out_baddr := rf.io.out_baddr
+ rf.io.ecnt <> ce.io.ecnt
+ ce.io.vals <> rf.io.vals
+ ce.io.ptrs <> rf.io.ptrs
}
* 6. Check if counter (cnt) is equal to length to assert finish,
* otherwise go to step 2.
*/
-class Compute extends Module {
+class Compute(implicit config: AccelConfig) extends Module {
val io = IO(new Bundle {
val launch = Input(Bool())
val finish = Output(Bool())
- val length = Input(UInt(32.W))
- val inp_baddr = Input(UInt(64.W))
- val out_baddr = Input(UInt(64.W))
+ val ecnt = Vec(config.nECnt, ValidIO(UInt(config.regBits.W)))
+ val vals = Input(Vec(config.nVals, UInt(config.regBits.W)))
+ val ptrs = Input(Vec(config.nPtrs, UInt(config.ptrBits.W)))
val mem = new VTAMemDPIMaster
})
val sIdle :: sReadReq :: sReadData :: sWriteReq :: sWriteData :: Nil = Enum(5)
val state = RegInit(sIdle)
+ val const = io.vals(0)
+ val length = io.vals(1)
+ val cycles = RegInit(0.U(config.regBits.W))
val reg = Reg(chiselTypeOf(io.mem.rd.bits))
- val cnt = Reg(chiselTypeOf(io.length))
- val raddr = Reg(chiselTypeOf(io.inp_baddr))
- val waddr = Reg(chiselTypeOf(io.out_baddr))
+ val cnt = Reg(UInt(config.regBits.W))
+ val raddr = Reg(UInt(config.ptrBits.W))
+ val waddr = Reg(UInt(config.ptrBits.W))
switch (state) {
is (sIdle) {
state := sWriteData
}
is (sWriteData) {
- when (cnt === (io.length - 1.U)) {
+ when (cnt === (length - 1.U)) {
state := sIdle
} .otherwise {
state := sReadReq
}
}
+ val last = state === sWriteData && cnt === (length - 1.U)
+
+ // cycle counter
+ when (state === sIdle) {
+ cycles := 0.U
+ } .otherwise {
+ cycles := cycles + 1.U
+ }
+
+ io.ecnt(0).valid := last
+ io.ecnt(0).bits := cycles
+
// calculate next address
when (state === sIdle) {
- raddr := io.inp_baddr
- waddr := io.out_baddr
+ raddr := io.ptrs(0)
+ waddr := io.ptrs(1)
} .elsewhen (state === sWriteData) { // increment by 8-bytes
raddr := raddr + 8.U
waddr := waddr + 8.U
// read
when (state === sReadData && io.mem.rd.valid) {
- reg := io.mem.rd.bits + 1.U
+ reg := io.mem.rd.bits + const
}
io.mem.rd.ready := state === sReadData
}
// done when read/write are equal to length
- io.finish := state === sWriteData && cnt === (io.length - 1.U)
+ io.finish := last
}
* Register description | addr
* -------------------------|-----
* Control status register | 0x00
- * Length value register | 0x04
- * Input pointer lsb | 0x08
- * Input pointer msb | 0x0c
- * Output pointer lsb | 0x10
- * Output pointer msb | 0x14
+ * Cycle counter | 0x04
+ * Constant value | 0x08
+ * Vector length | 0x0c
+ * Input pointer lsb | 0x10
+ * Input pointer msb | 0x14
+ * Output pointer lsb | 0x18
+ * Output pointer msb | 0x1c
* -------------------------------
* ------------------------------
* Finish | 1
* ------------------------------
*/
-class RegFile extends Module {
+class RegFile(implicit config: AccelConfig) extends Module {
val io = IO(new Bundle {
val launch = Output(Bool())
val finish = Input(Bool())
- val length = Output(UInt(32.W))
- val inp_baddr = Output(UInt(64.W))
- val out_baddr = Output(UInt(64.W))
+ val ecnt = Vec(config.nECnt, Flipped(ValidIO(UInt(config.regBits.W))))
+ val vals = Output(Vec(config.nVals, UInt(config.regBits.W)))
+ val ptrs = Output(Vec(config.nPtrs, UInt(config.regBits.W)))
val host = new VTAHostDPIClient
})
val sIdle :: sRead :: Nil = Enum(2)
io.host.req.deq := state === sIdle & io.host.req.valid
- val reg = Seq.fill(6)(RegInit(0.U.asTypeOf(chiselTypeOf(io.host.req.value))))
- val addr = Seq.tabulate(6)(_ * 4)
+ val nTotal = config.nCtrl + config.nECnt + config.nVals + (2*config.nPtrs)
+ val reg = Seq.fill(nTotal)(RegInit(0.U.asTypeOf(chiselTypeOf(io.host.req.value))))
+ val addr = Seq.tabulate(nTotal)(_ * 4)
val reg_map = (addr zip reg) map { case (a, r) => a.U -> r }
+ val eo = config.nCtrl
+ val vo = eo + config.nECnt
+ val po = vo + config.nVals
- (reg zip addr).foreach { case(r, a) =>
- if (a == 0) { // control status register
- when (io.finish) {
- r := "b_10".U
- } .elsewhen (state === sIdle && io.host.req.valid &&
- io.host.req.opcode && a.U === io.host.req.addr) {
- r := io.host.req.value
- }
- } else {
- when (state === sIdle && io.host.req.valid &&
- io.host.req.opcode && a.U === io.host.req.addr) {
- r := io.host.req.value
- }
+ when (io.finish) {
+ reg(0) := "b_10".U
+ } .elsewhen (state === sIdle && io.host.req.valid &&
+ io.host.req.opcode && addr(0).U === io.host.req.addr) {
+ reg(0) := io.host.req.value
+ }
+
+ for (i <- 0 until config.nECnt) {
+ when (io.ecnt(i).valid) {
+ reg(eo + i) := io.ecnt(i).bits
+ } .elsewhen (state === sIdle && io.host.req.valid &&
+ io.host.req.opcode && addr(eo + i).U === io.host.req.addr) {
+ reg(eo + i) := io.host.req.value
+ }
+ }
+
+ for (i <- 0 until (config.nVals + (2*config.nPtrs))) {
+ when (state === sIdle && io.host.req.valid &&
+ io.host.req.opcode && addr(vo + i).U === io.host.req.addr) {
+ reg(vo + i) := io.host.req.value
}
}
io.host.resp.bits := rdata
io.launch := reg(0)(0)
- io.length := reg(1)
- io.inp_baddr := Cat(reg(3), reg(2))
- io.out_baddr := Cat(reg(5), reg(4))
+
+ for (i <- 0 until config.nVals) {
+ io.vals(i) := reg(vo + i)
+ }
+
+ for (i <- 0 until config.nPtrs) {
+ io.ptrs(i) := Cat(reg(po + 2*i + 1), reg(po + 2*i))
+ }
}
--- /dev/null
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ifeq (, $(shell which verilator))
+ $(error "No Verilator in $(PATH), consider doing apt-get install verilator")
+endif
+
+# Change VERILATOR_INC_DIR if Verilator is installed on a different location
+ifeq (, $(VERILATOR_INC_DIR))
+ ifeq (, $(wildcard /usr/local/share/verilator/include/*))
+ ifeq (, $(wildcard /usr/share/verilator/include/*))
+ $(error "Verilator include directory is not set properly")
+ else
+ VERILATOR_INC_DIR := /usr/share/verilator/include
+ endif
+ else
+ VERILATOR_INC_DIR := /usr/local/share/verilator/include
+ endif
+endif
+
+TOP = TestAccel
+BUILD_NAME = build
+USE_TRACE = 0
+LIBNAME = libhw
+
+vta_dir = $(abspath ../../../../)
+tvm_dir = $(abspath ../../../../../)
+build_dir = $(abspath .)/$(BUILD_NAME)
+
+verilator_opt = --cc
+verilator_opt += +define+RANDOMIZE_GARBAGE_ASSIGN
+verilator_opt += +define+RANDOMIZE_REG_INIT
+verilator_opt += +define+RANDOMIZE_MEM_INIT
+verilator_opt += --x-assign unique
+verilator_opt += --output-split 20000
+verilator_opt += --output-split-cfuncs 20000
+verilator_opt += --top-module ${TOP}
+verilator_opt += -Mdir ${build_dir}
+
+cxx_flags = -O2 -Wall -fPIC -shared
+cxx_flags += -fvisibility=hidden -std=c++11
+cxx_flags += -DVL_TSIM_NAME=V$(TOP)
+cxx_flags += -DVL_PRINTF=printf
+cxx_flags += -DVL_USER_FINISH
+cxx_flags += -DVM_COVERAGE=0
+cxx_flags += -DVM_SC=0
+cxx_flags += -Wno-sign-compare
+cxx_flags += -include V$(TOP).h
+cxx_flags += -I$(build_dir)
+cxx_flags += -I$(VERILATOR_INC_DIR)
+cxx_flags += -I$(VERILATOR_INC_DIR)/vltstd
+cxx_flags += -I$(vta_dir)/include
+cxx_flags += -I$(tvm_dir)/include
+cxx_flags += -I$(tvm_dir)/3rdparty/dlpack/include
+
+cxx_files = $(VERILATOR_INC_DIR)/verilated.cpp
+cxx_files += $(VERILATOR_INC_DIR)/verilated_dpi.cpp
+cxx_files += $(wildcard $(build_dir)/*.cpp)
+cxx_files += $(vta_dir)/hardware/dpi/tsim_device.cc
+
+v_files = $(wildcard $(abspath .)/src/*.v $(vta_dir)/hardware/chisel/src/main/resources/verilog/*.v)
+
+ifneq ($(USE_TRACE), 0)
+ verilator_opt += --trace
+ cxx_flags += -DVM_TRACE=1
+ cxx_flags += -DTSIM_TRACE_FILE=$(build_dir)/$(TOP).vcd
+ cxx_files += $(VERILATOR_INC_DIR)/verilated_vcd_c.cpp
+else
+ cxx_flags += -DVM_TRACE=0
+endif
+
+default: lib
+
+lib: $(build_dir)/$(LIBNAME).so
+$(build_dir)/$(LIBNAME).so: $(build_dir)/V$(TOP).cpp
+ g++ $(cxx_flags) $(cxx_files) -o $@
+
+verilator: $(build_dir)/V$(TOP).cpp
+$(build_dir)/V$(TOP).cpp: $(v_files) | $(build_dir)
+ verilator $(verilator_opt) $(v_files)
+
+$(build_dir):
+ mkdir -p $@
+
+clean:
+ -rm -rf $(build_dir)
logic launch;
logic finish;
+
+ logic event_counter_valid;
+ logic [HOST_DATA_BITS-1:0] event_counter_value;
+
+ logic [HOST_DATA_BITS-1:0] constant;
logic [HOST_DATA_BITS-1:0] length;
logic [MEM_ADDR_BITS-1:0] inp_baddr;
logic [MEM_ADDR_BITS-1:0] out_baddr;
)
rf
(
- .clock (clock),
- .reset (reset),
-
- .host_req_valid (host_req_valid),
- .host_req_opcode (host_req_opcode),
- .host_req_addr (host_req_addr),
- .host_req_value (host_req_value),
- .host_req_deq (host_req_deq),
- .host_resp_valid (host_resp_valid),
- .host_resp_bits (host_resp_bits),
-
- .launch (launch),
- .finish (finish),
- .length (length),
- .inp_baddr (inp_baddr),
- .out_baddr (out_baddr)
+ .clock (clock),
+ .reset (reset),
+
+ .host_req_valid (host_req_valid),
+ .host_req_opcode (host_req_opcode),
+ .host_req_addr (host_req_addr),
+ .host_req_value (host_req_value),
+ .host_req_deq (host_req_deq),
+ .host_resp_valid (host_resp_valid),
+ .host_resp_bits (host_resp_bits),
+
+ .launch (launch),
+ .finish (finish),
+
+ .event_counter_valid (event_counter_valid),
+ .event_counter_value (event_counter_value),
+
+ .constant (constant),
+ .length (length),
+ .inp_baddr (inp_baddr),
+ .out_baddr (out_baddr)
);
Compute #
)
comp
(
- .clock (clock),
- .reset (reset),
-
- .mem_req_valid (mem_req_valid),
- .mem_req_opcode (mem_req_opcode),
- .mem_req_len (mem_req_len),
- .mem_req_addr (mem_req_addr),
- .mem_wr_valid (mem_wr_valid),
- .mem_wr_bits (mem_wr_bits),
- .mem_rd_valid (mem_rd_valid),
- .mem_rd_bits (mem_rd_bits),
- .mem_rd_ready (mem_rd_ready),
-
- .launch (launch),
- .finish (finish),
- .length (length),
- .inp_baddr (inp_baddr),
- .out_baddr (out_baddr)
+ .clock (clock),
+ .reset (reset),
+
+ .mem_req_valid (mem_req_valid),
+ .mem_req_opcode (mem_req_opcode),
+ .mem_req_len (mem_req_len),
+ .mem_req_addr (mem_req_addr),
+ .mem_wr_valid (mem_wr_valid),
+ .mem_wr_bits (mem_wr_bits),
+ .mem_rd_valid (mem_rd_valid),
+ .mem_rd_bits (mem_rd_bits),
+ .mem_rd_ready (mem_rd_ready),
+
+ .launch (launch),
+ .finish (finish),
+
+ .event_counter_valid (event_counter_valid),
+ .event_counter_value (event_counter_value),
+
+ .constant (constant),
+ .length (length),
+ .inp_baddr (inp_baddr),
+ .out_baddr (out_baddr)
);
endmodule
input launch,
output finish,
+
+ output event_counter_valid,
+ output [HOST_DATA_BITS-1:0] event_counter_value,
+
+ input [HOST_DATA_BITS-1:0] constant,
input [HOST_DATA_BITS-1:0] length,
input [MEM_ADDR_BITS-1:0] inp_baddr,
input [MEM_ADDR_BITS-1:0] out_baddr
IDLE: begin
if (launch) begin
state_n = READ_REQ;
- end
+ end
end
READ_REQ: begin
READ_DATA: begin
if (mem_rd_valid) begin
state_n = WRITE_REQ;
- end else begin
+ end else begin
state_n = READ_DATA;
- end
+ end
end
WRITE_REQ: begin
WRITE_DATA: begin
if (cnt == (length - 1'b1)) begin
state_n = IDLE;
- end else begin
+ end else begin
state_n = READ_REQ;
- end
+ end
end
default: begin
endcase
end
+ logic last;
+ assign last = (state_r == WRITE_DATA) & (cnt == (length - 1'b1));
+
+ // cycle counter
+ logic [HOST_DATA_BITS-1:0] cycle_counter;
+ always_ff @(posedge clock) begin
+ if (reset | state_r == IDLE) begin
+ cycle_counter <= '0;
+ end else begin
+ cycle_counter <= cycle_counter + 1'b1;
+ end
+ end
+
+ assign event_counter_valid = last;
+ assign event_counter_value = cycle_counter;
+
// calculate next address
always_ff @(posedge clock) begin
if (reset | state_r == IDLE) begin
// read
always_ff @(posedge clock) begin
if ((state_r == READ_DATA) & mem_rd_valid) begin
- data <= mem_rd_bits + 1'b1;
+ data <= mem_rd_bits + {32'd0, constant};
end
end
assign mem_rd_ready = state_r == READ_DATA;
end
// done when read/write are equal to length
- assign finish = (state_r == WRITE_DATA) & (cnt == (length - 1'b1));
+ assign finish = last;
endmodule
* Register description | addr
* -------------------------|-----
* Control status register | 0x00
- * Length value register | 0x04
- * Input pointer lsb | 0x08
- * Input pointer msb | 0x0c
- * Output pointer lsb | 0x10
- * Output pointer msb | 0x14
+ * Cycle counter | 0x04
+ * Constant value | 0x08
+ * Vector length | 0x0c
+ * Input pointer lsb | 0x10
+ * Input pointer msb | 0x14
+ * Output pointer lsb | 0x18
+ * Output pointer msb | 0x1c
* -------------------------------
* ------------------------------
output launch,
input finish,
+
+ input event_counter_valid,
+ input [HOST_DATA_BITS-1:0] event_counter_value,
+
+ output [HOST_DATA_BITS-1:0] constant,
output [HOST_DATA_BITS-1:0] length,
output [MEM_ADDR_BITS-1:0] inp_baddr,
output [MEM_ADDR_BITS-1:0] out_baddr
);
+ localparam NUM_REG = 8;
+
typedef enum logic {IDLE, READ} state_t;
state_t state_n, state_r;
IDLE: begin
if (host_req_valid & ~host_req_opcode) begin
state_n = READ;
- end
+ end
end
READ: begin
assign host_req_deq = (state_r == IDLE) ? host_req_valid : 1'b0;
- logic [HOST_DATA_BITS-1:0] rf [5:0];
+ logic [HOST_DATA_BITS-1:0] rf [NUM_REG-1:0];
genvar i;
- for (i = 0; i < 6; i++) begin
+ for (i = 0; i < NUM_REG; i++) begin
+
logic wen = (state_r == IDLE)? host_req_valid & host_req_opcode & i*4 == host_req_addr : 1'b0;
+
if (i == 0) begin
+
always_ff @(posedge clock) begin
if (reset) begin
- end else if (finish) begin
- rf[i] <= 'd2;
- end else if (wen) begin
- rf[i] <= host_req_value;
- end
+ rf[i] <= 'd0;
+ end else if (finish) begin
+ rf[i] <= 'd2;
+ end else if (wen) begin
+ rf[i] <= host_req_value;
+ end
end
+
+ end else if (i == 1) begin
+
+ always_ff @(posedge clock) begin
+ if (reset) begin
+ rf[i] <= 'd0;
+ end else if (event_counter_valid) begin
+ rf[i] <= event_counter_value;
+ end else if (wen) begin
+ rf[i] <= host_req_value;
+ end
+ end
+
end else begin
+
always_ff @(posedge clock) begin
if (reset) begin
- end else if (wen) begin
- rf[i] <= host_req_value;
- end
+ rf[i] <= 'd0;
+ end else if (wen) begin
+ rf[i] <= host_req_value;
+ end
end
+
end
+
end
logic [HOST_DATA_BITS-1:0] rdata;
rdata <= rf[4];
end else if (host_req_addr == 'h14) begin
rdata <= rf[5];
+ end else if (host_req_addr == 'h18) begin
+ rdata <= rf[6];
+ end else if (host_req_addr == 'h1c) begin
+ rdata <= rf[7];
end else begin
rdata <= 'd0;
end
assign host_resp_bits = rdata;
assign launch = rf[0][0];
- assign length = rf[1];
- assign inp_baddr = {rf[3], rf[2]};
- assign out_baddr = {rf[5], rf[4]};
+ assign constant = rf[2];
+ assign length = rf[3];
+ assign inp_baddr = {rf[5], rf[4]};
+ assign out_baddr = {rf[7], rf[6]};
endmodule
import tvm
import ctypes
-import json
import os.path as osp
from sys import platform
-def driver(hw_lib, sw_lib):
- """Init hardware and software shared library for add-by-one accelerator
+def driver(hw_backend):
+ """Init hardware and software shared library for accelerator
Parameters
------------
- hw_lib : str
- Name of hardware shared library
+ hw_backend : str
+ Hardware backend can be verilog or chisel
- sw_lib : str
- Name of software shared library
"""
+ _ext = ".dylib" if platform == "darwin" else ".so"
+ _hw_libname = "libhw" + _ext
+ _sw_libname = "libsw" + _ext
_cur_path = osp.dirname(osp.abspath(osp.expanduser(__file__)))
- _root_path = osp.join(_cur_path, "..", "..")
- _cfg_file = osp.join(_root_path, "config", "config.json")
- _cfg = json.load(open(_cfg_file))
- if not hw_lib.endswith(("dylib", "so")):
- hw_lib += ".dylib" if platform == "darwin" else ".so"
- if not sw_lib.endswith(("dylib", "so")):
- sw_lib += ".dylib" if platform == "darwin" else ".so"
- _hw_lib = osp.join(_root_path, _cfg['BUILD_NAME'], hw_lib)
- _sw_lib = osp.join(_root_path, _cfg['BUILD_NAME'], sw_lib)
+ if hw_backend in ("verilog", "chisel"):
+ _hw_lib = osp.join(_cur_path, "..", "..", "hardware", hw_backend, "build", _hw_libname)
+ _sw_lib = osp.join(_cur_path, "..", "..", "build", _sw_libname)
def load_dll(dll):
try:
except OSError:
return []
- def run(a, b):
+ def run(a, b, c):
load_dll(_sw_lib)
f = tvm.get_global_func("tvm.vta.driver")
m = tvm.module.load(_hw_lib, "vta-tsim")
- f(m, a, b)
+ return f(m, a, b, c)
return run
module.operator->());
}
- int Run(uint32_t length, void* inp, void* out) {
- uint32_t wait_cycles = 100000000;
- this->Launch(wait_cycles, length, inp, out);
- this->WaitForCompletion(wait_cycles);
+ uint32_t Run(uint32_t c, uint32_t length, void* inp, void* out) {
+ uint32_t cycles;
+ this->Launch(c, length, inp, out);
+ cycles = this->WaitForCompletion();
dpi_->Finish();
- return 0;
+ return cycles;
}
private:
- void Launch(uint32_t wait_cycles, uint32_t length, void* inp, void* out) {
- dpi_->Launch(wait_cycles);
- // write registers
- dpi_->WriteReg(0x04, length);
- dpi_->WriteReg(0x08, get_half_addr(inp, false));
- dpi_->WriteReg(0x0c, get_half_addr(inp, true));
- dpi_->WriteReg(0x10, get_half_addr(out, false));
- dpi_->WriteReg(0x14, get_half_addr(out, true));
- dpi_->WriteReg(0x00, 0x1); // launch
+ void Launch(uint32_t c, uint32_t length, void* inp, void* out) {
+ dpi_->Launch(wait_cycles_);
+ // set counter to zero
+ dpi_->WriteReg(0x04, 0);
+ dpi_->WriteReg(0x08, c);
+ dpi_->WriteReg(0x0c, length);
+ dpi_->WriteReg(0x10, get_half_addr(inp, false));
+ dpi_->WriteReg(0x14, get_half_addr(inp, true));
+ dpi_->WriteReg(0x18, get_half_addr(out, false));
+ dpi_->WriteReg(0x1c, get_half_addr(out, true));
+ // launch
+ dpi_->WriteReg(0x00, 0x1);
}
- void WaitForCompletion(uint32_t wait_cycles) {
+ uint32_t WaitForCompletion() {
uint32_t i, val;
- for (i = 0; i < wait_cycles; i++) {
+ for (i = 0; i < wait_cycles_; i++) {
val = dpi_->ReadReg(0x00);
- if (val == 2) break; // finish
+ if (val == 2) break; // finish
}
+ val = dpi_->ReadReg(0x04);
+ return val;
}
+ uint32_t wait_cycles_{100000000};
DPIModuleNode* dpi_;
Module module_;
};
DLTensor* A = args[1];
DLTensor* B = args[2];
Device dev_(dev_mod);
- dev_.Run(A->shape[0], A->data, B->data);
+ uint32_t cycles = dev_.Run(static_cast<int>(args[3]), A->shape[0], A->data, B->data);
+ *rv = static_cast<int>(cycles);
});
} // namespace driver
import tvm
import numpy as np
-from tsim.driver import driver
+from accel.driver import driver
-def test_tsim(i):
- rmin = 1 # min vector size of 1
+def test_accel():
rmax = 64
- n = np.random.randint(rmin, rmax)
+ n = np.random.randint(1, rmax)
+ c = np.random.randint(0, rmax)
ctx = tvm.cpu(0)
a = tvm.nd.array(np.random.randint(rmax, size=n).astype("uint64"), ctx)
b = tvm.nd.array(np.zeros(n).astype("uint64"), ctx)
- f = driver("libhw", "libsw")
- f(a, b)
- emsg = "[FAIL] test number:{} n:{}".format(i, n)
- np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1, err_msg=emsg)
- print("[PASS] test number:{} n:{}".format(i, n))
+ f = driver("chisel")
+ cycles = f(a, b, c)
+ msg = "cycles:{0:4} n:{1:2} c:{2:2}".format(cycles, n, c)
+ np.testing.assert_equal(b.asnumpy(), a.asnumpy() + c, err_msg = "[FAIL] " + msg)
+ print("[PASS] " + msg)
if __name__ == "__main__":
- times = 10
- for i in range(times):
- test_tsim(i)
+ for i in range(10):
+ test_accel()
# specific language governing permissions and limitations
# under the License.
-file(GLOB TSIM_SW_SRC src/driver.cc)
-add_library(sw SHARED ${TSIM_SW_SRC})
-target_include_directories(sw PRIVATE ${VTA_DIR}/include)
+import tvm
+import numpy as np
-if(APPLE)
- set_target_properties(sw PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-endif(APPLE)
+from accel.driver import driver
+
+def test_accel():
+ rmax = 64
+ n = np.random.randint(1, rmax)
+ c = np.random.randint(0, rmax)
+ ctx = tvm.cpu(0)
+ a = tvm.nd.array(np.random.randint(rmax, size=n).astype("uint64"), ctx)
+ b = tvm.nd.array(np.zeros(n).astype("uint64"), ctx)
+ f = driver("verilog")
+ cycles = f(a, b, c)
+ msg = "cycles:{0:4} n:{1:2} c:{2:2}".format(cycles, n, c)
+ np.testing.assert_equal(b.asnumpy(), a.asnumpy() + c, err_msg = "[FAIL] " + msg)
+ print("[PASS] " + msg)
+
+if __name__ == "__main__":
+ for i in range(10):
+ test_accel()
always_ff @(posedge clock) begin
if (__exit == 'd1) begin
- $display("[TSIM] Verilog $finish called at cycle:%016d", cycles);
$finish;
end
end
// VL_USER_FINISH needs to be defined when compiling Verilator code
void vl_finish(const char* filename, int linenum, const char* hier) {
Verilated::gotFinish(true);
- VL_PRINTF("[TSIM] exiting simulation\n");
}
int VTADPISim(uint64_t max_cycles) {