| |-- apptests Source files for App Tests
| |-- utils Source files for Test Utils
| `-- models NPU Model binaries from second/third-party repos
-|-- tools/ Useful tools/scripts (e.g., testdata generator)
+|-- tools/ Useful tools or scripts
|-- packaging/ Tizen Packaging Files (i.e., FastModel Simulator)
|-- debian/ Debian Packaging Files (i.e., Ubuntu 16.04)
`-- doc/ Documentation
- How to use the NPU Engine library in user apps: press [here](/doc/how_to_use.md)
- Usage examples using the NPU Engine library: press [here](/doc/usage_examples.md)
+## Related Repositories
+- NPU SystemService's testdata: press [here](https://github.sec.samsung.net/AIP/NPU_SystemService_Testdata)
+- NPU SystemService's NPU emulator: press [here](https://github.sec.samsung.net/AIP/NPU_SystemService_Emulator)
+
## Reference Links
- Software Stack: http://suprem.sec.samsung.net/confluence/display/ODLC/NPU+OS+Stack
- CI Server: http://nnsuiteci.mooo.com/NPU_SystemService/ci/taos/
Build-Depends: ninja-build, meson (>=0.50), debhelper (>=9),
gcc-9 | gcc-8 | gcc-7 | gcc-6 | gcc-5, libgtest-dev, python,
libdrm-dev, libiniparser-dev, pkg-config, cmake, linux-fvp-headers,
- libnpuvision-dev, libnpuvision-testdata, npu-decoder-testdata, npu-encoder
+ npu-engine-emul, npu-engine-testdata
Standards-Version: 3.8.2
Homepage: https://research.samsung.com
Package: npu-engine
Architecture: amd64
Multi-Arch: same
-Depends: ${shlibs:Depends}, ${misc:Depends}
+Depends: npu-engine-emul, ${shlibs:Depends}, ${misc:Depends}
Description: NPU Engine
This provides in-NPU software control daemon, NPU-Engine.
Recommends: npu-engine-testdata
Description: NPU Engine Example Package
Example application package for NPU Engine, including UnitTests and AppTest with realistic scenarios (TBD).
-
-Package: npu-engine-testdata
-Architecture: amd64
-Multi-Arch: same
-Depends: ${shlibs:Depends}, ${misc:Depends}
-Description: Test data for verfication of NPU Engine
- This package provides test data for verfication of NPU Engine.
- Note that the npu-example package (especailly, the application tests) requires this package.
+++ /dev/null
-/opt/trinity/share/npu-engine/testdata/*
ROOT_DIR:=$(shell pwd)
export NPU_TRINITY_INSTALL_PREFIX=/opt/trinity
+export NPU_TESTDATA_PATH=${NPU_TRINITY_INSTALL_PREFIX}/share/npu-engine/testdata
export PATH=$(shell printenv PATH):${NPU_TRINITY_INSTALL_PREFIX}/bin
export LC_ALL=C.UTF-8
./build/tests/apptests/apptest_dmabuf_model
./build/tests/apptests/apptest_dmabuf_buffers
./build/tests/apptests/apptest_async_callbacks
-
- # Gen model binaries
- # 1) Singl-layer models from AIP/SIM_Trinity_SIM (i.e., libnpuvision)
- # 2) Multi-layer models from AIP/NPU_SystemService
- # 3) MUlti-layer models from AIP/NPU_Compiler
-
- mkdir -p $(CURDIR)/npubinfmt_v1
- mkdir -p $(CURDIR)/npubinfmt_v2
-
- # 1) Reuse existing testdata but need to make npu models (v1)
- find /opt/trinity/share/npuvision/testdata/* -type d -exec basename {} \; |\
- xargs -I{} mkdir -p $(CURDIR)/npubinfmt_v1/{}
- find /opt/trinity/share/npuvision/testdata/* -type d -exec basename {} \; |\
- xargs -I{} $(ROOT_DIR)/tools/gen-testdata/gen_npu_model.py -s -o $(CURDIR)/npubinfmt_v1/{} \
- /opt/trinity/share/npuvision/testdata/{} 1
- find /opt/trinity/share/npuvision/testdata/* -type d -exec basename {} \; |\
- xargs -I{} $(ROOT_DIR)/build/tools/gen-testdata/gen_ref_data -o $(CURDIR)/npubinfmt_v1/{} \
- $(CURDIR)/npubinfmt_v1/{}/model.tvn > /dev/null
-
- # 2) Make npu models from scratch (v1/v2)
- $(ROOT_DIR)/build/tools/gen-testdata/gen_visa_prog $(CURDIR)/npubinfmt_v1 > /dev/null
- find $(CURDIR)/npubinfmt_v1 -name 'testcase*' -type d -exec basename {} \; | \
- xargs -I{} cp -r $(CURDIR)/npubinfmt_v1/{} $(CURDIR)/npubinfmt_v2/{}
-
- find $(CURDIR)/npubinfmt_v1 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} $(ROOT_DIR)/tools/gen-testdata/gen_npu_model.py -o $(CURDIR)/npubinfmt_v1/{} \
- $(CURDIR)/npubinfmt_v1/{} 1
- find $(CURDIR)/npubinfmt_v2 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} $(ROOT_DIR)/tools/gen-testdata/gen_npu_model.py -o $(CURDIR)/npubinfmt_v2/{} \
- $(CURDIR)/npubinfmt_v2/{} 2
-
- find $(CURDIR)/npubinfmt_v1 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} $(ROOT_DIR)/build/tools/gen-testdata/gen_ref_data -o $(CURDIR)/npubinfmt_v1/{} \
- $(CURDIR)/npubinfmt_v1/{}/model.tvn > /dev/null
- find $(CURDIR)/npubinfmt_v2 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} $(ROOT_DIR)/build/tools/gen-testdata/gen_ref_data -o $(CURDIR)/npubinfmt_v2/{} \
- $(CURDIR)/npubinfmt_v2/{}/model.tvn > /dev/null
-
- # 3) Make only reference input/output data (v1 currently)
- tar zxf $(ROOT_DIR)/tests/models/tvn_models.tar.gz
-
- find $(CURDIR)/tvn_models/* -type d -exec basename {} \; |\
- xargs -I{} $(ROOT_DIR)/build/tools/gen-testdata/gen_ref_data -o $(CURDIR)/tvn_models/{} \
- $(CURDIR)/tvn_models/{}/model.tvn > /dev/null
- find $(CURDIR)/tvn_models/* -type d -exec basename {} \; |\
- xargs -I{} mv $(CURDIR)/tvn_models/{} $(CURDIR)/npubinfmt_v1
-
- # Cleanup
- rm -rf $(CURDIR)/tvn_models
- find $(CURDIR)/npubinfmt_v1 -type f -name 'program.*' -delete
- find $(CURDIR)/npubinfmt_v2 -type f -name 'program.*' -delete
- find $(CURDIR)/npubinfmt_v1 -type f -name 'input_weight*' -delete
- find $(CURDIR)/npubinfmt_v2 -type f -name 'input_weight*' -delete
-
# Test the models
- ./build/tests/apptests/apptest_example_visa $(CURDIR)/npubinfmt_v1 > /dev/null
- ./build/tests/apptests/apptest_example_visa $(CURDIR)/npubinfmt_v2 > /dev/null
+ ./build/tests/apptests/apptest_example_visa $(NPU_TESTDATA_PATH)/npubinfmt_v1 > /dev/null
+ ./build/tests/apptests/apptest_example_visa $(NPU_TESTDATA_PATH)/npubinfmt_v2 > /dev/null
override_dh_auto_install:
DESTDIR=$(CURDIR)/debian/tmp ninja -C build install
- mkdir -p debian/tmp/${NPU_TRINITY_INSTALL_PREFIX}/share/npu-engine/testdata
- mv npubinfmt_v1 debian/tmp/${NPU_TRINITY_INSTALL_PREFIX}/share/npu-engine/testdata
- mv npubinfmt_v2 debian/tmp/${NPU_TRINITY_INSTALL_PREFIX}/share/npu-engine/testdata
override_dh_install:
dh_install --sourcedir=debian/tmp --list-missing
subdir('src')
subdir('tests')
-subdir('tools')
# Set configuration to install .ini
ne_install_conf = configuration_data()
-%define neexampledir %{_libdir}/npu-engine/bin
-%define testdatadir_npuvision %{_datadir}/npuvision/testdata
-%define testdatadir_out %{_datadir}/npu-engine/testdata
+%define neexampledir %{_libdir}/npu-engine/bin
Name: npu-engine
Summary: NPU Engine
# gtest
BuildRequires: gtest-devel
-BuildRequires: libnpuvision-devel
-%ifarch aarch64 x86_64
-# for testdata generation
-BuildRequires: libnpuvision-testdata
-BuildRequires: npu-encoder
-BuildRequires: npu-encoder-testdata
-BuildRequires: python
-%endif
-
# test coverage dependency
%if 0%{?test_coverage}
BuildRequires: lcov
# NPU emulation dependency (used in unittests only)
%if 0%{?unit_test}
%define enable_npu_emul true
+BuildRequires: npu-engine-emul
+
+%ifarch x86_64
+%define testdatadir %{_datadir}/npu-engine/testdata
+BuildRequires: npu-engine-testdata
+%endif
# valgrind requires the same arch with host (x86_64)
%if 0%{?use_valgrind}
# Install Core (NPU-Engine)
DESTDIR=%{buildroot} ninja install -C build %{?_smp_mflags}
-%ifarch aarch64 x86_64
-
-# Install Test Data
-
-# Gen model binaries
-# 1) Singl-layer models from AIP/SIM_Trinity_SIM (i.e., libnpuvision)
-# 2) Multi-layer models from AIP/NPU_SystemService
-# 3) MUlti-layer models from AIP/NPU_Compiler
-
-mkdir -p npubinfmt_v1
-mkdir -p npubinfmt_v2
-
-# 1) Reuse existing testdata but need to make npu models (v1)
-find %{testdatadir_npuvision} -name 'core*' -type d -exec basename {} \; |\
- xargs -I{} mkdir -p npubinfmt_v1/{}
-
-find %{testdatadir_npuvision} -name 'core*' -type d -exec basename {} \; |\
- xargs -I{} ./tools/gen-testdata/gen_npu_model.py -s -o npubinfmt_v1/{} \
- %{testdatadir_npuvision}/{} 1
-
-find %{testdatadir_npuvision} -name 'core*' -type d -exec basename {} \; |\
- xargs -I{} ./build/tools/gen-testdata/gen_ref_data -o npubinfmt_v1/{} \
- npubinfmt_v1/{}/model.tvn > /dev/null
-
-# 2) Make npu models from scratch (v1/v2)
-./build/tools/gen-testdata/gen_visa_prog npubinfmt_v1 > /dev/null
-find npubinfmt_v1 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} cp -r npubinfmt_v1/{} npubinfmt_v2/{}
-
-find npubinfmt_v1 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} ./tools/gen-testdata/gen_npu_model.py -o npubinfmt_v1/{} npubinfmt_v1/{} 1
-find npubinfmt_v2 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} ./tools/gen-testdata/gen_npu_model.py -o npubinfmt_v2/{} npubinfmt_v2/{} 2
-
-find npubinfmt_v1 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} ./build/tools/gen-testdata/gen_ref_data -o npubinfmt_v1/{} \
- npubinfmt_v1/{}/model.tvn > /dev/null
-
-find npubinfmt_v2 -name 'testcase*' -type d -exec basename {} \; |\
- xargs -I{} ./build/tools/gen-testdata/gen_ref_data -o npubinfmt_v2/{} \
- npubinfmt_v2/{}/model.tvn > /dev/null
-
-# 3) Make only reference input/output data (v1)
-tar zxf tests/models/tvn_models.tar.gz
-find tvn_models/* -type d -exec basename {} \; | xargs -I{} mkdir -p npubinfmt_v1/{}
-find tvn_models/* -type d -exec basename {} \; |\
- xargs -I{} ./build/tools/gen-testdata/gen_ref_data -o npubinfmt_v1/{} \
- tvn_models/{}/model.tvn > /dev/null
-find tvn_models/* -type d -exec basename {} \; |\
- xargs -I{} mv tvn_models/{}/model.tvn npubinfmt_v1/{}/
-
-# Cleanup
-rm -rf tvn_models
-find npubinfmt_v1 -type f -name 'program.*' -delete
-find npubinfmt_v2 -type f -name 'program.*' -delete
-find npubinfmt_v1 -type f -name 'input_weight*' -delete
-find npubinfmt_v2 -type f -name 'input_weight*' -delete
-
-mkdir -p %{buildroot}%{testdatadir_out}
-mv npubinfmt_v1 %{buildroot}%{testdatadir_out}
-mv npubinfmt_v2 %{buildroot}%{testdatadir_out}
-
-%endif
-
# Perform unit tests
%check
%if 0%{?unit_test}
# Run apptests using actual model files.
%ifarch x86_64
- ./apptests/apptest_example_visa %{buildroot}%{testdatadir_out}/npubinfmt_v1 > /dev/null
- ./apptests/apptest_example_visa %{buildroot}%{testdatadir_out}/npubinfmt_v2 > /dev/null
+ ./apptests/apptest_example_visa /%{testdatadir}/npubinfmt_v1 > /dev/null
+ ./apptests/apptest_example_visa /%{testdatadir}/npubinfmt_v2 > /dev/null
%endif
popd
%package example
Summary: NPU Engine Example Package
-%description example
-Example application package for NPU Engine, including UnitTests and AppTest with realistic scenarios (TBD).
Requires: npu-engine = %{version}-%{release}
-Requires: npu-testdata = %{version}-%{release}
+%description example
+Example application package for NPU Engine, including UnitTests and AppTest with realistic scenarios.
%files example
%%defattr(-,root,root,-)
%{neexampledir}/unittests/*
%{neexampledir}/apptests/*
-%ifarch aarch64 x86_64
-%package testdata
-Summary: Test data for verfication of NPU Engine
-%description testdata
-This package provides test data for verfication of NPU Engine.
-Note that the npu-example package (especailly, the application tests) requires this package.
-%files testdata
-%%defattr(-,root,root,-)
-%{_datadir}/npu-engine/testdata/npubinfmt_v1/*
-%{_datadir}/npu-engine/testdata/npubinfmt_v2/*
-%endif
-
%package unittest-coverage
Summary: NPU Engine UnitTest Coverage Analysis Result
%description unittest-coverage
+++ /dev/null
-/**
- * Proprietary
- * Copyright (C) 2019 Samsung Electronics
- * Copyright (C) 2019 Dongju Chae <dongju.chae@samsung.com>
- */
-/**
- * @file NPUcore.cpp
- * @date 19 Nov 2019
- * @brief Implementation of NPU emulation for VISA operations
- * @author Dongju Chae <dongju.chae@samsung.com>
- * @bug No known bugs except for NYI items
- *
- * To packagers: this is used by NPU Engine (NE).
- */
-
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-
-#include <iostream>
-
-#include "NPUemul.h"
-
-#define PAGE_SIZE 4096
-
-using namespace std;
-using namespace trinity_vision;
-
-/** @brief run inference using emulation */
-int NPUCoreEmul::run (char *model, char *input) {
- if (!model || !input)
- return -1;
-
- model_base = model;
- buffer_base = input;
-
- memcpy (&meta, model_base, NPUBIN_META_SIZE);
- program_base = model_base + NPUBIN_META_SIZE;
- weight_base = program_base + meta.program_size;
-
- return run_inference ();
-}
-
-/** @brief get the current opcode */
-visa_opcode NPUCoreEmul::get_opcode () {
- return static_cast<visa_opcode>(PC[0]);
-}
-
-/** @brief dump the output data to file and read again to memeory */
-void NPUCoreEmul::dump_fmap_out (uint32_t out_addr) {
- Point1D<int8_t, 8> data_mem;
-
- /** TrinityCore provides API to dump data to a file only */
- trinity_fmap_out.write_data_file("/tmp", "output_fmap", true, true, true);
-
- data_mem.clean();
- if (read_data_file("/tmp", "output_fmap", data_mem)) {
- memcpy(buffer_base + out_addr,
- data_mem.get_p_data(),
- data_mem.get_mem_size());
- }
-}
-
-/** @brief run ESUM (elementwise sum) op */
-void NPUCoreEmul::run_esum_op (TRINITY_CORE_PARA_OP& op) {
- esum_fields esum;
-
- memcpy(&esum, PC, sizeof(esum_fields));
-
- op.info.OPCODE = 7;
- op.cfg.WGT_QBIT = 1;
- op.cfg.FLT_H = 1;
- op.cfg.FLT_W = 1;
- op.data_size.OUT_H = esum.out_h_m1 + 1;
- op.data_size.OUT_W = esum.out_w_m1 + 1;
- op.data_size.OUT_D = esum.out_d_m1 + 1;
- op.data_size.IN0_D = esum.out_d_m1 + 1;
- op.quant.IN0_ZERO = esum.in0_zero;
- op.quant.IN1_ZERO = esum.in1_zero;
- op.quant.OUT_ZERO = esum.out_zero;
- op.quant.OUT_LSHAMT = esum.out_shamt;
- op.quant.IN0_LSHAMT = 20;
- op.quant.IN0_RSHAMT = esum.in0_shamt;
- op.quant.IN1_LSHAMT = 20;
- op.quant.IN1_RSHAMT = esum.in1_shamt;
- op.quant.IN0_MULT = esum.in0_mult;
- op.quant.IN1_MULT = esum.in1_mult;
- op.quant.OUT_MULT = esum.out_mult;
-
- if (alloc_trinity_data (op, esum.in0_eaddr0, esum.in1_eaddr0, -1)) {
- dump_fmap_out(esum.out_eaddr0);
- }
-
- PC += sizeof(esum_fields);
-}
-
-/** @brief run MAXP (max pooling) op */
-void NPUCoreEmul::run_maxp_op (TRINITY_CORE_PARA_OP& op) {
- maxp_fields maxp;
-
- memcpy(&maxp, PC, sizeof(maxp_fields));
-
- op.info.OPCODE = 4;
- op.cfg.WGT_QBIT = 1;
- op.cfg.FLT_H = maxp.flt_h_m1 + 1;
- op.cfg.FLT_W = maxp.flt_w_m1 + 1;
- op.cfg.STR_Y = maxp.str_y_m1 + 1;
- op.cfg.STR_X = maxp.str_x_m1 + 1;
- op.cfg.PAD_L = maxp.pad_l;
- op.cfg.PAD_R = maxp.pad_r;
- op.cfg.PAD_T = maxp.pad_t;
- op.cfg.PAD_B = maxp.pad_b;
- op.data_size.OUT_H = maxp.out_h_m1 + 1;
- op.data_size.OUT_W = maxp.out_w_m1 + 1;
- op.data_size.OUT_D = maxp.out_d_m1 + 1;
- op.data_size.IN0_D = maxp.out_d_m1 + 1;
- op.quant.IN0_ZERO = 0;
- op.quant.OUT_ZERO = 0;
-
- if (alloc_trinity_data (op, maxp.in0_eaddr0, -1, -1)) {
- dump_fmap_out(maxp.out_eaddr0);
- }
-
- PC += sizeof(maxp_fields);
-}
-
-/** @brief run CONV (normal convolution) op */
-void NPUCoreEmul::run_conv_op (TRINITY_CORE_PARA_OP& op) {
- conv_fields conv;
-
- memcpy(&conv, PC, sizeof(conv_fields));
-
- op.info.OPCODE = 0;
- op.info.CNV_RELU_EN = conv.relu_en;
- op.cfg.CNV_BIAS_EN = conv.bias_en;
- op.cfg.WGT_QBIT = conv.wgt_q + 1;
- op.cfg.FLT_H = conv.flt_h_m1 + 1;
- op.cfg.FLT_W = conv.flt_w_m1 + 1;
- op.cfg.STR_Y = conv.str_y_m1 + 1;
- op.cfg.STR_X = conv.str_x_m1 + 1;
- op.cfg.PAD_L = conv.pad_l;
- op.cfg.PAD_R = conv.pad_r;
- op.cfg.PAD_T = conv.pad_t;
- op.cfg.PAD_B = conv.pad_b;
- op.data_size.OUT_H = conv.out_h_m1 + 1;
- op.data_size.OUT_W = conv.out_w_m1 + 1;
- op.data_size.OUT_D = conv.out_d_m1 + 1;
- op.data_size.IN0_D = conv.in0_d_m1 + 1;
- op.quant.IN0_ZERO = conv.in0_zero;
- op.quant.OUT_ZERO = conv.out_zero;
- op.quant.OUT_LSHAMT = conv.out_shamt;
- op.quant.OUT_MULT = conv.out_mult;
-
- /** The OPCODE can be different depending on input depth
- * (@todo we need to define enum instead of a raw integer */
- if (op.data_size.IN0_D <= 3)
- op.info.OPCODE = 1;
-
- if (alloc_trinity_data (op, conv.in0_eaddr0, -1, conv.wgt_eaddr0)) {
- dump_fmap_out(conv.out_eaddr0);
- }
-
- PC += sizeof(conv_fields);
-}
-
-/** @brief run ReLU op */
-void NPUCoreEmul::run_relu_op (TRINITY_CORE_PARA_OP& op) {
- relu_fields relu;
-
- memcpy(&relu, PC, sizeof(relu));
-
- op.info.OPCODE = 6;
- op.cfg.WGT_QBIT = 1;
- op.cfg.FLT_H = 1;
- op.cfg.FLT_W = 1;
-
- op.data_size.OUT_H = relu.out_h_m1 + 1;
- op.data_size.OUT_W = relu.out_w_m1 + 1;
- op.data_size.OUT_D = relu.out_d_m1 + 1;
- op.data_size.IN0_D = relu.out_d_m1 + 1;
-
- op.quant.IN0_ZERO = relu.in0_zero;
- op.quant.OUT_ZERO = relu.out_zero;
-
- if (alloc_trinity_data (op, relu.in0_eaddr0, -1, -1)) {
- dump_fmap_out(relu.out_eaddr0);
- }
-
- PC += sizeof(relu);
-}
-
-/** @brief run AVGP (average pooling) op */
-void NPUCoreEmul::run_avgp_op (TRINITY_CORE_PARA_OP& op) {
- static const int AVGP_RSHAMTS[17] = {
- 0, 0, 0, 1,
- 1, 0, 2, 0,
- 2, 3, 0, 0,
- 3, 0, 0, 0,
- 3};
- static const int AVGP_MULTS[17] = {
- 0, 0, 1073741824, 1431655765,
- 1073741824, 0, 1431655765, 0,
- 1073741824, 1908874354, 0, 0,
- 1431655765, 0, 0, 0,
- 1073741824};
- avgp_fields avgp;
-
- memcpy(&avgp, PC, sizeof(avgp));
- op.info.OPCODE = 5;
- op.cfg.WGT_QBIT = 1;
-
- op.cfg.FLT_H = avgp.flt_h_m1 + 1;
- op.cfg.FLT_W = avgp.flt_w_m1 + 1;
- op.cfg.STR_Y = avgp.str_y_m1 + 1;
- op.cfg.STR_X = avgp.str_x_m1 + 1;
- op.cfg.PAD_L = avgp.pad_l;
- op.cfg.PAD_R = avgp.pad_r;
- op.cfg.PAD_T = avgp.pad_t;
- op.cfg.PAD_B = avgp.pad_b;
-
- op.data_size.OUT_H = avgp.out_h_m1 + 1;
- op.data_size.OUT_W = avgp.out_w_m1 + 1;
- op.data_size.OUT_D = avgp.out_d_m1 + 1;
- op.data_size.IN0_D = avgp.out_d_m1 + 1;
-
- op.quant.IN0_ZERO = 0;
- op.quant.OUT_ZERO = 0;
-
- op.quant.OUT_RSHAMT = AVGP_RSHAMTS[op.cfg.FLT_H * op.cfg.FLT_W];
- op.quant.OUT_MULT = AVGP_MULTS[op.cfg.FLT_H * op.cfg.FLT_W];
- op.quant.OUT_LSHAMT = 0;
-
- if (alloc_trinity_data (op, avgp.in0_eaddr0, -1, -1)) {
- dump_fmap_out(avgp.out_eaddr0);
- }
-
- PC += sizeof(avgp);
-}
-
-/** @brief run CONVE (convolution and elementwise sum) op */
-void NPUCoreEmul::run_conve_op (TRINITY_CORE_PARA_OP& op) {
- conve_fields conve;
-
- memcpy(&conve, PC, sizeof(conve));
-
- op.info.OPCODE = 0;
- op.info.CNV_ESUM_EN = 1;
- op.info.CNV_RELU_EN = conve.relu_en;
- op.cfg.CNV_BIAS_EN = conve.bias_en;
- op.cfg.WGT_QBIT = conve.wgt_q + 1;
- op.cfg.FLT_H = conve.flt_h_m1 + 1;
- op.cfg.FLT_W = conve.flt_w_m1 + 1;
- op.cfg.STR_Y = conve.str_y_m1 + 1;
- op.cfg.STR_X = conve.str_x_m1 + 1;
- op.cfg.PAD_L = conve.pad_l;
- op.cfg.PAD_R = conve.pad_r;
- op.cfg.PAD_T = conve.pad_t;
- op.cfg.PAD_B = conve.pad_b;
-
- op.data_size.OUT_H = conve.out_h_m1 + 1;
- op.data_size.OUT_W = conve.out_w_m1 + 1;
- op.data_size.OUT_D = conve.out_d_m1 + 1;
- op.data_size.IN0_D = conve.in0_d_m1 + 1;
-
- op.quant.IN0_ZERO = conve.in0_zero;
- op.quant.IN1_ZERO = conve.in1_zero;
- op.quant.OUT_ZERO = conve.out_zero;
- op.quant.IN0_MULT = conve.in0_mult;
- op.quant.IN1_MULT = conve.in1_mult;
- op.quant.IN1_LSHAMT = 20;
- op.quant.IN1_RSHAMT = conve.in1_shamt;
- op.quant.OUT_MULT = conve.out_mult;
- op.quant.OUT_LSHAMT = conve.out_shamt;
- op.quant.IN0_LSHAMT = conve.in0_shamt;
-
- if (alloc_trinity_data (op, conve.in0_eaddr0, conve.in1_eaddr0, conve.wgt_eaddr0)) {
- dump_fmap_out(conve.out_eaddr0);
- }
-
- PC += sizeof(conve);
-}
-
-/**
- * @brief run TCONV (transposed convolution) op
- * @note this instruction is not supported yet
- */
-void NPUCoreEmul::run_tcnv_op (TRINITY_CORE_PARA_OP& op) {
- tcnv_fields tcnv;
-
- memcpy(&tcnv, PC, sizeof(tcnv));
-
- /* TODO: Fill here */
-
- PC += sizeof(tcnv);
-}
-
-/**
- * @brief run TCONVE (transposed convolution and elementwise sum) op
- * @note this instruction is not supported yet
- */
-void NPUCoreEmul::run_tcnve_op (TRINITY_CORE_PARA_OP& op) {
- tcnve_fields tcnve;
-
- memcpy(&tcnve, PC, sizeof(tcnve));
-
- /* TODO: Fill here */
-
- PC += sizeof(tcnve);
-}
-
-/** @brief allocate data for emulation */
-bool NPUCoreEmul::alloc_trinity_data (TRINITY_CORE_PARA_OP& op,
- int32_t in0_eaddr, int32_t in1_eaddr, int32_t wgt_eaddr) {
- TRINITY_FMAP_PARA para_fmap_in;
- TRINITY_FMAP_PARA para_fmap_out;
- WGT_PARA para_weight;
- SIZE3D size3d_in, size3d_out;
-
- if (!calc_tensor_data_size(op, para_fmap_in, para_weight, para_fmap_out)) {
- cerr << "Fail to parse a trinity op" << endl;
- return false;
- }
-
- size3d_in.depth = para_fmap_in.depth;
- size3d_in.height = para_fmap_in.height;
- size3d_in.width = para_fmap_in.width;
-
- size3d_out.depth = para_fmap_out.depth;
- size3d_out.height = para_fmap_out.height;
- size3d_out.width = para_fmap_out.width;
-
- /** input fmap */
- Point1D<int8_t, 8> input_data;
- int32_t input_size = size3d_in.depth * size3d_in.height * size3d_in.width;
-
- input_data.clean();
- input_data.alloc (input_size);
- memcpy(input_data.get_p_data(), buffer_base + in0_eaddr, input_size);
-
- trinity_fmap_in.alloc(size3d_in);
- trinity_fmap_in.set_data(input_data);
-
- /** input esum (optional) */
- if (in1_eaddr != -1) {
- /** if it's CONVE (not ESUM), the size should be output size */
- if (op.info.OPCODE == 0) {
- int32_t output_size = size3d_out.depth * size3d_out.height * size3d_out.width;
- input_data.alloc (output_size);
- memcpy(input_data.get_p_data(), buffer_base + in1_eaddr, output_size);
- trinity_esum_in.alloc(size3d_out);
- } else {
- input_data.alloc (input_size);
- memcpy(input_data.get_p_data(), buffer_base + in1_eaddr, input_size);
- trinity_esum_in.alloc(size3d_in);
- }
- trinity_esum_in.set_data(input_data);
- }
-
- /** weight (optional) */
- if (wgt_eaddr != -1) {
- Point1D<int8_t, 8> wgt_data;
- int32_t weight_size = para_weight.CACL_SIZE_BYTE;
-
- wgt_data.clean();
- wgt_data.alloc (weight_size);
- memcpy(wgt_data.get_p_data(), weight_base + wgt_eaddr, weight_size);
-
- trinity_weight.alloc(para_weight);
- trinity_weight.set_data(wgt_data, para_fmap_in.depth > 3 ? WBIN_NORMAL : WBIN_NARROW);
- }
-
- /** output fmap */
- trinity_fmap_out.alloc(size3d_out);
-
- do_main_operation(op,
- trinity_fmap_in, trinity_esum_in,
- trinity_weight, trinity_fmap_out);
-
- return true;
-}
-
-/** @brief inference main loop */
-int NPUCoreEmul::run_inference () {
- bool stop = false;
-
- PC = program_base;
-
- while (!stop && PC < program_base + meta.program_size) {
- visa_opcode opcode = get_opcode ();
- TRINITY_CORE_PARA_OP op;
-
- switch (opcode) {
- case VISA_RELU:
- run_relu_op (op);
- break;
- case VISA_AVGP:
- run_avgp_op (op);
- break;
- case VISA_NOP: /** do nothing */
- PC += sizeof(uint32_t);
- break;
- case VISA_SAW: /** program end */
- PC += sizeof(uint32_t);
- stop = true;
- break;
- case VISA_ESUM:
- run_esum_op (op);
- break;
- case VISA_MAXP:
- run_maxp_op (op);
- break;
- default:
- if ((opcode & 0xFE) == VISA_CONV)
- run_conv_op (op);
- else if ((opcode & 0xFE) == VISA_CONVE)
- run_conve_op (op);
- else
- stop = true;
- break;
- }
- }
-
- return 0;
-}
+++ /dev/null
-/**
- * Proprietary
- * Copyright (C) 2019 Samsung Electronics
- * Copyright (C) 2019 Dongju Chae <dongju.chae@samsung.com>
- */
-/**
- * @file NPUemul.h
- * @date 19 Nov 2019
- * @brief Implementation of NPU emulation including ioctl & VISA C emulation
- * @author Dongju Chae <dongju.chae@samsung.com>
- * @bug No known bugs except for NYI items
- */
-
-#ifndef __NPU_CORE_NPUemul_H__
-#define __NPU_CORE_NPUemul_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
- VISA_NOP = 0x00,
- VISA_SAW = 0x02,
- VISA_ESUM = 0x08,
- VISA_RELU = 0x09,
- VISA_MAXP = 0x0A,
- VISA_AVGP = 0x0B,
- VISA_CONV = 0x10,
- VISA_CONVE = 0x12,
-} visa_opcode;
-
-/** @brief the below defines the VISA-encoded structure for each op */
-
-struct esum_fields {
- /** 0x0 */
- uint8_t opcode;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t out_zero : 8;
- uint32_t in0_zero : 8;
- uint32_t in1_zero : 8;
- uint32_t : 0;
- /** 0x8 */
- uint8_t out_shamt : 6;
- uint8_t : 0;
- uint8_t in0_shamt : 5;
- uint8_t : 0;
- uint16_t in1_shamt : 5;
- uint16_t : 0;
- /** 0xC */
- uint32_t out_mult;
- uint32_t in0_mult;
- uint32_t in1_mult;
- /** 0x18 */
- uint16_t out_w_m1;
- uint16_t out_h_m1;
- uint32_t out_d_m1 : 16;
- uint32_t : 0;
- /** 0x24 */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- uint32_t in0_iaddr0 : 21;
- uint32_t : 0;
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- uint32_t in1_iaddr0 : 21;
- uint32_t : 0;
- uint32_t in1_imod_y : 21;
- uint32_t : 0;
- uint32_t in1_imod_z : 21;
- uint32_t : 0;
- /** 0x44 */
- uint32_t out_eaddr0;
- uint32_t out_emod_y;
- uint32_t out_emod_z;
- uint32_t in0_eaddr0;
- uint32_t in0_emod_y;
- uint32_t in0_emod_z;
- uint32_t in1_eaddr0;
- uint32_t in1_emod_y;
- uint32_t in1_emod_z;
- /** 0x68 */
-} __attribute__((packed));
-
-struct maxp_fields {
- /** 0x0 */
- uint8_t opcode;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t pad_t : 3;
- uint32_t pad_b : 3;
- uint32_t pad_l : 3;
- uint32_t pad_r : 3;
- uint32_t flt_w_m1 : 3;
- uint32_t flt_h_m1 : 3;
- uint32_t str_x_m1 : 1;
- uint32_t str_y_m1 : 1;
- uint32_t : 0;
- /** 0x8 */
- uint16_t out_w_m1;
- uint16_t out_h_m1;
- uint32_t out_d_m1 : 16;
- uint32_t : 0;
- /** 0x10 */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- uint32_t in0_iaddr0 : 21;
- uint32_t : 0;
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- /** 0x28 */
- uint32_t out_eaddr0;
- uint32_t out_emod_y;
- uint32_t out_emod_z;
- uint32_t in0_eaddr0;
- uint32_t in0_emod_y;
- uint32_t in0_emod_z;
- /** 0x40 */
-} __attribute__((packed));
-
-struct conv_fields {
- /** 0x0 */
- uint8_t relu_en : 1;
- uint8_t : 0;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t pad_t : 3;
- uint32_t pad_b : 3;
- uint32_t pad_l : 3;
- uint32_t pad_r : 3;
- uint32_t flt_w_m1 : 3;
- uint32_t flt_h_m1 : 3;
- uint32_t str_x_m1 : 1;
- uint32_t str_y_m1 : 1;
- uint32_t wgt_q : 2;
- uint32_t bias_en : 1;
- uint32_t : 0;
- /** 0x8 */
- uint32_t out_zero : 8;
- uint32_t in0_zero : 8;
- uint32_t : 0;
- /** 0xC */
- uint32_t out_shamt : 6;
- uint32_t : 0;
- /** 0x10 */
- uint32_t out_mult;
- uint16_t out_w_m1;
- uint16_t out_h_m1;
- uint16_t out_d_m1;
- uint16_t in0_d_m1;
- /** 0x1C */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- /** 0x20 */
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- /** 0x24 */
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- /** 0x28 */
- uint32_t in0_iaddr0 : 21;
- uint32_t : 0;
- /** 0x2C */
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- /** 0x30 */
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- /** 0x34 */
- uint32_t wgt_addr0 : 21;
- uint32_t : 0;
- /** 0x38 */
- uint32_t out_eaddr0;
- uint32_t out_emod_y;
- uint32_t out_emod_z;
- uint32_t in0_eaddr0;
- uint32_t in0_emod_y;
- uint32_t in0_emod_z;
- uint32_t wgt_eaddr0;
- /** 0x54 */
-} __attribute__((packed));
-
-struct relu_fields {
- /** 0x0 */
- uint8_t opcode;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t out_zero : 8;
- uint32_t in0_zero : 8;
- uint32_t : 0;
- /** 0x8 */
- uint16_t out_w_m1;
- uint16_t out_h_m1;
- /** 0xC */
- uint16_t out_d_m1;
- uint16_t : 0;
- /** 0x10 */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- /** 0x14 */
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- /** 0x18 */
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- /** 0x1C */
- uint32_t in_iaddr0 : 21;
- uint32_t : 0;
- /** 0x20 */
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- /** 0x24 */
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- /** 0x28 */
- uint32_t out_eaddr0;
- /** 0x2C */
- uint32_t out_emod_y;
- /** 0x30 */
- uint32_t out_emod_z;
- /** 0x34 */
- uint32_t in0_eaddr0;
- /** 0x38 */
- uint32_t in0_emod_y;
- /** 0x3C */
- uint32_t in0_emod_z;
- /** 0x40 */
-} __attribute__((packed));
-
-struct avgp_fields {
- /** 0x0 */
- uint8_t opcode;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t pad_t : 3;
- uint32_t pad_b : 3;
- uint32_t pad_l : 3;
- uint32_t pad_r : 3;
- uint32_t flt_w_m1 : 3;
- uint32_t flt_h_m1 : 3;
- uint32_t str_x_m1 : 1;
- uint32_t str_y_m1 : 1;
- uint32_t : 0;
- /** 0x8 */
- uint16_t out_w_m1;
- uint16_t out_h_m1;
- /** 0xC */
- uint16_t out_d_m1;
- uint16_t : 0;
- /** 0x10 */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- /** 0x14 */
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- /** 0x18 */
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- /** 0x1C */
- uint32_t in0_iaddr0 : 21;
- uint32_t : 0;
- /** 0x20 */
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- /** 0x24 */
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- /** 0x28 */
- uint32_t out_eaddr0;
- /** 0x2C */
- uint32_t out_emod_y;
- /** 0x30 */
- uint32_t out_emod_z;
- /** 0x34 */
- uint32_t in0_eaddr0;
- /** 0x38 */
- uint32_t in0_emod_y;
- /** 0x3C */
- uint32_t in0_emod_z;
- /** 0x40 */
-} __attribute__((packed));
-
-struct tcnv_fields {
- /** 0x0 */
- uint8_t relu_en : 1;
- uint8_t : 0;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t pad_t : 3;
- uint32_t pad_b : 3;
- uint32_t pad_l : 3;
- uint32_t pad_r : 3;
- uint32_t flt_w_m1 : 3;
- uint32_t flt_h_m1 : 3;
- uint32_t str_x_m1 : 1;
- uint32_t str_y_m1 : 1;
- uint32_t wgt_q : 2;
- uint32_t bias_en : 1;
- uint32_t : 0;
- /** 0x8 */
- uint32_t out_zero : 8;
- uint32_t in0_zero : 8;
- uint32_t : 0;
- /** 0xC */
- uint32_t out_shamt : 6;
- uint32_t : 0;
- /** 0x10 */
- uint32_t out_mult;
- /** 0x14 */
- uint16_t in0_w_m1;
- uint16_t in0_h_m1;
- /** 0x18 */
- uint16_t out_d_m1;
- uint16_t in0_d_m1;
- /** 0x1C */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- /** 0x20 */
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- /** 0x24 */
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- /** 0x28 */
- uint32_t in0_iaddr0 : 21;
- uint32_t : 0;
- /** 0x2C */
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- /** 0x30 */
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- /** 0x34 */
- uint32_t wgt_addr0 : 21;
- uint32_t : 0;
- /** 0x38 */
- uint32_t out_eaddr0;
- /** 0x3C */
- uint32_t out_emod_y;
- /** 0x40 */
- uint32_t out_emod_z;
- /** 0x44 */
- uint32_t in0_eaddr0;
- /** 0x48 */
- uint32_t in0_emod_y;
- /** 0x4C */
- uint32_t in0_emod_z;
- /** 0x50 */
- uint32_t wgt_eaddr0;
- /** 0x54 */
-} __attribute__((packed));
-
-struct conve_fields {
- /** 0x0 */
- uint8_t relu_en : 1;
- uint8_t : 0;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t pad_t : 3;
- uint32_t pad_b : 3;
- uint32_t pad_l : 3;
- uint32_t pad_r : 3;
- uint32_t flt_w_m1 : 3;
- uint32_t flt_h_m1 : 3;
- uint32_t str_x_m1 : 1;
- uint32_t str_y_m1 : 1;
- uint32_t wgt_q : 2;
- uint32_t bias_en : 1;
- uint32_t : 0;
- /** 0x8 */
- uint32_t out_zero : 8;
- uint32_t in0_zero : 8;
- uint32_t in1_zero : 8;
- uint32_t : 0;
- /** 0xC */
- uint8_t out_shamt : 6;
- uint8_t : 0;
- uint8_t in0_shamt : 6;
- uint8_t : 0;
- uint16_t in1_shamt : 5;
- uint16_t : 0;
- /** 0x10 */
- uint32_t out_mult;
- /** 0x14 */
- uint32_t in0_mult;
- /** 0x18 */
- uint32_t in1_mult;
- /** 0x1C */
- uint16_t out_w_m1;
- uint16_t out_h_m1;
- /** 0x20 */
- uint16_t out_d_m1;
- uint16_t in0_d_m1;
- /** 0x24 */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- /** 0x28 */
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- /** 0x2C */
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- /** 0x30 */
- uint32_t in0_iaddr0 : 21;
- uint32_t : 0;
- /** 0x34 */
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- /** 0x38 */
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- /** 0x3C */
- uint32_t in1_iaddr0 : 21;
- uint32_t : 0;
- /** 0x40 */
- uint32_t in1_imod_y : 21;
- uint32_t : 0;
- /** 0x44 */
- uint32_t in1_imod_z : 21;
- uint32_t : 0;
- /** 0x48 */
- uint32_t wgt_addr0 : 21;
- uint32_t : 0;
- /** 0x4C */
- uint32_t out_eaddr0;
- /** 0x50 */
- uint32_t out_emod_y;
- /** 0x54 */
- uint32_t out_emod_z;
- /** 0x58 */
- uint32_t in0_eaddr0;
- /** 0x5C */
- uint32_t in0_emod_y;
- /** 0x60 */
- uint32_t in0_emod_z;
- /** 0x64 */
- uint32_t in1_eaddr0;
- /** 0x68 */
- uint32_t in1_emod_y;
- /** 0x6C */
- uint32_t in1_emod_z;
- /** 0x70 */
- uint32_t wgt_eaddr0;
- /** 0x74 */
-} __attribute__((packed));
-
-struct tcnve_fields {
- /** 0x0 */
- uint8_t relu_en : 1;
- uint8_t : 0;
- uint8_t dma_en : 4;
- uint8_t emr : 2;
- uint8_t : 0;
- uint16_t cmd_id;
- /** 0x4 */
- uint32_t pad_t : 3;
- uint32_t pad_b : 3;
- uint32_t pad_l : 3;
- uint32_t pad_r : 3;
- uint32_t flt_w_m1 : 3;
- uint32_t flt_h_m1 : 3;
- uint32_t str_x_m1 : 1;
- uint32_t str_y_m1 : 1;
- uint32_t wgt_q : 2;
- uint32_t bias_en : 1;
- uint32_t : 0;
- /** 0x8 */
- uint32_t out_zero : 8;
- uint32_t in0_zero : 8;
- uint32_t in1_zero : 8;
- uint32_t : 0;
- /** 0xC */
- uint8_t out_shamt : 6;
- uint8_t : 0;
- uint8_t in0_shamt : 6;
- uint8_t : 0;
- uint16_t in1_shamt : 5;
- uint8_t : 0;
- /** 0x10 */
- uint32_t out_mult;
- /** 0x14 */
- uint32_t in0_mult;
- /** 0x18 */
- uint32_t in1_mult;
- /** 0x1C */
- uint16_t in0_w_m1;
- uint16_t in0_h_m1;
- /** 0x20 */
- uint16_t out_d_m1;
- uint16_t in0_d_m1;
- /** 0x24 */
- uint32_t out_iaddr0 : 21;
- uint32_t : 0;
- /** 0x28 */
- uint32_t out_imod_y : 21;
- uint32_t : 0;
- /** 0x2C */
- uint32_t out_imod_z : 21;
- uint32_t : 0;
- /** 0x30 */
- uint32_t in0_iaddr0 : 21;
- uint32_t : 0;
- /** 0x34 */
- uint32_t in0_imod_y : 21;
- uint32_t : 0;
- /** 0x38 */
- uint32_t in0_imod_z : 21;
- uint32_t : 0;
- /** 0x3C */
- uint32_t in1_iaddr0 : 21;
- uint32_t : 0;
- /** 0x40 */
- uint32_t in1_imod_y : 21;
- uint32_t : 0;
- /** 0x44 */
- uint32_t in1_imod_z : 21;
- uint32_t : 0;
- /** 0x48 */
- uint32_t wgt_addr0 : 21;
- uint32_t : 0;
- /** 0x4C */
- uint32_t out_eaddr0;
- /** 0x50 */
- uint32_t out_emod_y;
- /** 0x54 */
- uint32_t out_emod_z;
- /** 0x58 */
- uint32_t in0_eaddr0;
- /** 0x5C */
- uint32_t in0_emod_y;
- /** 0x60 */
- uint32_t in0_emod_z;
- /** 0x64 */
- uint32_t in1_eaddr0;
- /** 0x68 */
- uint32_t in1_emod_y;
- /** 0x6C */
- uint32_t in1_emod_z;
- /** 0x70 */
- uint32_t wgt_eaddr0;
- /** 0x74 */
-} __attribute__((packed));
-
-#ifdef __cplusplus
-}
-
-#include <npubinfmt.h>
-#include <NPUdrvAPI.h>
-/** @brief second-party libraries */
-#include <TrinityCore.h>
-#include <DataGen.h>
-
-using namespace trinity_vision;
-
-/** @brief NPU Emulation using TrinityCore */
-class NPUCoreEmul : public TrinityCore<64>, public DataGen {
- public:
- /** @brief constructor */
- NPUCoreEmul() {};
- /** @brief run inference using emulation */
- int run (char *model, char *buffer);
-
- protected:
- /** @brief set metadata from the dmabuf */
- void set_meta (int dmabuf_id);
- /** @brief get the current opcode */
- visa_opcode get_opcode ();
- /** @brief dump the output data to file and read again to memeory */
- void dump_fmap_out (uint32_t out_addr);
- /** @brief allocate data for emulation */
- bool alloc_trinity_data (TRINITY_CORE_PARA_OP& op,
- int32_t in0_eaddr, int32_t in1_eaddr, int32_t wgt_eaddr);
-
- /** @brief run ESUM (elementwise sum) op */
- void run_esum_op (TRINITY_CORE_PARA_OP& op);
- /** @brief run MAXP (max pooling) op */
- void run_maxp_op (TRINITY_CORE_PARA_OP& op);
- /** @brief run CONV (normal convolution) op */
- void run_conv_op (TRINITY_CORE_PARA_OP& op);
- /** @brief run ReLU op */
- void run_relu_op (TRINITY_CORE_PARA_OP& op);
- /** @brief run AVGP (average pooling) op */
- void run_avgp_op (TRINITY_CORE_PARA_OP& op);
- /** @brief run CONVE (convolution and elementwise sum) op */
- void run_conve_op (TRINITY_CORE_PARA_OP& op);
-
- /** @brief run TCONV (transposed convolution) op, WIP */
- void run_tcnv_op (TRINITY_CORE_PARA_OP& op);
- /** @brief run TCONVE (transposed convolution and elementwise sum) op, WIP */
- void run_tcnve_op (TRINITY_CORE_PARA_OP& op);
-
- /** @brief inference main loop */
- int run_inference ();
-
- private:
- npubin_meta meta;
-
- char *PC;
- char *model_base;
- char *program_base;
- char *weight_base;
- char *buffer_base;
-
- TR_FMAP trinity_fmap_in;
- TR_FMAP trinity_esum_in;
- TR_FMAP trinity_fmap_out;
- TrinityWgt trinity_weight;
-};
-#endif
-
-#endif /** __NPU_CORE_NPUemul_H__ */
#include <GEMdrvAPI.h>
#include <NPUdrvAPI.h>
+#include <NPUemul.h>
+#include <npubinfmt.h>
+
#include "NPUioctl.h"
-#include "NPUemul.h"
#include <errno.h>
#include <unistd.h>
/** @brief wrapper for the inference run */
int run (void *model, void *input) {
- return emul.run (static_cast<char*>(model), static_cast<char*>(input));
+ return run_npu_emul (static_cast<char*>(model), static_cast<char*>(input));
}
private:
std::map<int, npu_model_config *> model_map;
- NPUCoreEmul emul;
int cyclic_model_id;
};
-libnpuvision_dep = dependency('libnpuvision')
-
-ne_core_npu_emul_inc = include_directories('.')
+ne_emul_dep = dependency('npu-engine-emul')
ne_core_npu_emul_dep = declare_dependency(
- sources : ['NPUemul.cpp'],
- dependencies : [libnpuvision_dep],
- include_directories : [ne_common_inc, ne_core_npu_inc, ne_core_npu_emul_inc])
-
-if get_option('enable_npu_emul')
-ne_core_npu_emul_ioctl_dep = declare_dependency(
sources : ['NPUioctl.cpp'],
- dependencies : [ne_core_npu_emul_dep, ne_core_gem_dep])
-endif
+ dependencies : [ne_emul_dep, ne_core_gem_dep])
ne_core_npu_inc = include_directories('.')
-subdir('emul')
-
if get_option('enable_npu_emul')
-ne_core_npu_dep = declare_dependency(
- sources : ['NPUdrvAPI.c'],
- dependencies : [ne_core_utils_dep, ne_core_npu_emul_ioctl_dep],
- include_directories : [ne_common_inc, ne_core_npu_inc])
+ subdir('emul')
+
+ ne_core_npu_dep = declare_dependency(
+ sources : ['NPUdrvAPI.c'],
+ dependencies : [ne_core_utils_dep, ne_core_npu_emul_dep],
+ include_directories : [ne_common_inc, ne_core_npu_inc])
else
-ne_core_npu_dep = declare_dependency(
- sources : ['NPUdrvAPI.c'],
- dependencies : [ne_core_utils_dep],
- include_directories : [ne_common_inc, ne_core_npu_inc])
+ ne_core_npu_dep = declare_dependency(
+ sources : ['NPUdrvAPI.c'],
+ dependencies : [ne_core_utils_dep],
+ include_directories : [ne_common_inc, ne_core_npu_inc])
endif
+++ /dev/null
-#!/usr/bin/env python
-
-##
-# @file gen_npu_model.py
-# @brief generate NPU model binary with VISA prog binary and weight data
-# @author Dongju Chae <dongju.chae@samsung.com>
-#
-# Usage:
-# $ python gen_npu_model.py [testdata directory]
-##
-
-import argparse
-import os
-import sys
-import struct
-import math
-import re
-
-## these values are common for all existing example visa binaries
-META_SIZE=4096
-SIZE_ALIGN=4096
-SIGNATURE='SRNPU'
-NPU_MODEL_NAME='model.tvn'
-
-MAX_TENSORS=16
-MAX_RANK=4
-ELEM_SIZE=1
-
-## @brief class for NPU tensor
-class Tensor:
- def __init__ (self, asm, dma_en):
- opcode = asm["opcode"]
- self.asm = asm
- self.dma_en = dma_en
- self.dims = []
- self.elem_size = ELEM_SIZE # do not consider other types yet
-
- ## @todo change the order of dimensions later (need some discussion)
- ## Current assumption: NHWC
- self.dims.append(1) # batch size; set 1 for now
- if dma_en == "out": # output tensor
- self.dims.append(asm["out_h_m1"] + 1) # height
- self.dims.append(asm["out_w_m1"] + 1) # width
- self.dims.append(asm["out_d_m1"] + 1) # depth
- elif dma_en == "in0": # 1st input tensor
- if opcode == "MAXP" or opcode == "AVGP" or opcode == "CONV" or opcode == "CONVE":
- in0_h_m1 = asm["out_h_m1"] * (asm["str_y_m1"] + 1) + asm["flt_h_m1"]
- in0_h_m1 = in0_h_m1 - asm["pad_t"] - asm["pad_b"]
- self.dims.append(in0_h_m1 + 1)
- in0_w_m1 = asm["out_w_m1"] * (asm["str_x_m1"] + 1) + asm["flt_w_m1"]
- in0_w_m1 = in0_w_m1 - asm["pad_l"] - asm["pad_r"]
- self.dims.append(in0_w_m1 + 1)
- self.dims.append(asm["out_d_m1"] + 1)
- elif opcode == "ESUM" or opcode == "RELU":
- self.dims.append(asm["out_h_m1"] + 1)
- self.dims.append(asm["out_w_m1"] + 1)
- self.dims.append(asm["out_d_m1"] + 1)
- else:
- raise Exception ("Unsupported opcode: " + opcode)
- elif dma_en == "in1": # 2nd input tensor
- if opcode == "ESUM" or opcode == "CONVE":
- self.dims.append(asm["out_h_m1"] + 1)
- self.dims.append(asm["out_w_m1"] + 1)
- self.dims.append(asm["out_d_m1"] + 1)
- else:
- raise Exception ("Unsupported opcode: " + opcode)
- else:
- raise Exception ("Unknown dma_en: " + dma_en)
-
- self.eaddr = asm[dma_en + "_eaddr0"]
- self.emod_y = asm[dma_en + "_emod_y"]
- self.emod_z = asm[dma_en + "_emod_z"]
-
- def size (self):
- size = self.elem_size
- for dim in self.dims:
- size *= dim
- return size
-
-## @brief class for NPU weight
-class Weight:
- def __init__ (self, asm, data):
- self.eaddr = asm["wgt_eaddr0"]
- self.qbit = asm["wgt_qbit_m1"] + 1 # is it necessary?
- self.data = data
- self.size = len(data)
-
-## @brief class for Virtual ISA (VISA) assembly
-class VisaAsm:
- def __init__ (self, asm):
- entries = asm.split()
-
- self.dic = {}
- self.dic["opcode"] = entries[0]
-
- for entry in entries[1:]:
- (key, value) = entry.split('=')
- self.dic[key] = value
-
- def __getitem__ (self, key):
- try:
- if key == "opcode":
- return self.dic[key]
- if "addr" in key:
- return int(self.dic[key], 16)
- return int(self.dic[key])
- except:
- return 0
-
-## @brief class for NPU model used in NPU Engine
-class NPUModel:
- ## @brief init function
- def __init__ (self, args):
- self.dir = args.dir[0]
- self.version = args.version[0]
- if self.version != 1 and self.version != 2:
- raise Exception('Unsupported npubinfmt version')
- if not args.outdir is None:
- self.outdir = args.outdir
- else:
- self.outdir = self.dir
- self.single_mode = args.single_mode
- self.debug_mode = args.debug_mode
-
- self.path_prog_asm = self.dir + "/program.asm"
- self.path_prog_binary = self.dir + "/program.bin"
-
- self.path_model = self.outdir + "/" + NPU_MODEL_NAME
-
- self.size_prog_binary = os.path.getsize(self.path_prog_binary)
- self.size_all_weights = 0
-
- self.signature = SIGNATURE
-
- self.input_tensors = []
- self.input_weights = []
- self.output_tensors = []
-
- self.file_model = open(self.path_model, "wb")
- self.file_prog = open(self.path_prog_asm, "r")
-
- ## @brief return the size of model including metadata
- def get_model_size (self):
- return META_SIZE + self.size_prog_binary + self.size_all_weights
-
- ## @brief return max buffer size to cover input/intermediate/output tensors
- def get_buffer_size (self):
- max_size = 0
-
- for tensor in self.input_tensors:
- if max_size < tensor.eaddr + tensor.size():
- max_size = tensor.eaddr + tensor.size()
-
- for tensor in self.output_tensors:
- if max_size < tensor.eaddr + tensor.size():
- max_size = tensor.eaddr + tensor.size()
-
- return max_size
-
- ## @brief get magic code with npubinfmt version
- def get_magic_code (self):
- value = 0
- for s in self.signature:
- value = (value << 8) + ord(s)
- value <<= (8 - len(self.signature)) * 8
- value += self.version
-
- return value
-
- ## @brief get binary data; return empty string if it does not exist
- def get_bin_data (self, name, idx):
- if self.single_mode:
- path = self.dir + "/" + name + ".bin"
- else:
- path = self.dir + "/" + name + "_" + str(idx) + ".bin"
- return open(path).read() if os.path.isfile(path) else ""
-
- ## @brief generate model binary file with provided data
- def gen (self):
- self.parse_assembly()
- self.fill_meta()
- self.fill_program()
- self.fill_weight()
- self.finalize()
-
- ## @brief parse program assembly to retreive necessary info.
- def parse_assembly (self):
- in0_added = False
- for idx, asm_line in enumerate(self.file_prog.readlines()):
- # Skip NOP and SAW instructions
- opcode = asm_line.split()[0]
- if opcode == "NOP":
- continue
- if opcode == "SAW":
- break
-
- # Parse Visa Assembly instruction
- asm = VisaAsm(asm_line)
-
- # Note that this model generator supports only HWA's layer operations, not a generic model.
- # So, each layer has 2 input tensors and 1 output tensor at the most.
- if asm["dma_in0_en"] == 1 and not in0_added:
- tensor = Tensor(asm, "in0")
- self.input_tensors.append(tensor)
- in0_added = True
- if asm["dma_in1_en"] == 1:
- tensor = Tensor(asm, "in1")
- self.input_tensors.append(tensor)
- if asm["dma_out_en"] == 1:
- tensor = Tensor(asm, "out")
- self.output_tensors.append(tensor)
- if asm["dma_wgt_en"] == 1:
- data = self.get_bin_data ("input_weight", idx)
- if data:
- weight = Weight(asm, data)
- self.size_all_weights += weight.size
- self.input_weights.append(weight)
-
- if self.single_mode:
- break
-
- ## @brief fill the metadata for npu model
- def fill_meta (self):
- self.file_model.seek(0)
- self.file_model.write(struct.pack('<Q', self.get_magic_code())) # magiccode
- self.file_model.write(struct.pack('<Q', 0)) # npu_version
- self.file_model.write(struct.pack('<Q', 0)) # compiler_version
- self.file_model.write("npu_engine_example_model") # name
- self.file_model.seek(8*3 + 128)
-
- self.file_model.write(struct.pack('<Q', 1)) # model id
- self.file_model.write(struct.pack('<Q', 1)) # model version
-
- self.file_model.write(struct.pack('<Q', self.get_buffer_size())) # buffer size
- self.file_model.write(struct.pack('<Q', self.get_model_size())) # size including meta
- self.file_model.write(struct.pack('<Q', 0)) # model type, SMODEL_OPS_NPU
-
- self.fill_v1_meta()
-
- self.file_model.write(struct.pack('<Q', self.size_prog_binary)) # program size
- self.file_model.write(struct.pack('<Q', self.size_all_weights)) # weight size
-
- self.fill_v2_meta()
-
- def fill_v1_meta (self):
- # assume all tensors are contiguous
- if self.version == 1:
- # input offset (first layer)
- self.file_model.write(struct.pack('<Q', self.input_tensors[0].eaddr))
- # input size (first layer) including esum (all layers)
- input_tensors_size = 0
- for tensor in self.input_tensors:
- input_tensors_size += tensor.size()
- self.file_model.write(struct.pack('<Q', input_tensors_size))
- # output offset (last lasyer)
- self.file_model.write(struct.pack('<Q', self.output_tensors[-1].eaddr))
- # output size (last layer)
- self.file_model.write(struct.pack('<Q', self.output_tensors[-1].size()))
- else:
- # fill dummy data for backward campatibility
- self.file_model.write(struct.pack('<Q', 0))
- self.file_model.write(struct.pack('<Q', 0))
- self.file_model.write(struct.pack('<Q', 0))
- self.file_model.write(struct.pack('<Q', 0))
-
- def fill_v2_meta (self):
- # each tensor has its offset value because they may be discontiguous
- if self.version == 2:
- # input tensors
- self.file_model.write(struct.pack('<I', len(self.input_tensors)))
- # input_offsets[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- if idx < len(self.input_tensors):
- self.file_model.write(struct.pack('<I', self.input_tensors[idx].eaddr))
- else:
- self.file_model.write(struct.pack('<I', 0))
- # input_elem_size[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- if idx < len(self.input_tensors):
- self.file_model.write(struct.pack('<I', self.input_tensors[idx].elem_size))
- else:
- self.file_model.write(struct.pack('<I', 0))
- # input_dims[MAX_TENSORS][MAX_RANK]
- for idx in range(MAX_TENSORS):
- for rank in range(MAX_RANK):
- if idx < len(self.input_tensors):
- self.file_model.write(struct.pack('<I', self.input_tensors[idx].dims[rank]))
- else:
- self.file_model.write(struct.pack('<I', 0))
- # input_emod_y[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- if idx < len(self.input_tensors):
- self.file_model.write(struct.pack('<I', self.input_tensors[idx].emod_y))
- else:
- self.file_model.write(struct.pack('<I', 0))
- # input_emod_z[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- if idx < len(self.input_tensors):
- self.file_model.write(struct.pack('<I', self.input_tensors[idx].emod_z))
- else:
- self.file_model.write(struct.pack('<I', 0))
- ## quantization information; @todo how to store exact values?
- # input_quant_z[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- self.file_model.write(struct.pack('<I', 0));
- # input_quant_s[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- self.file_model.write(struct.pack('<f', 1.0));
-
- # output tensors (in this model generator, only one output tensor exists)
- self.file_model.write(struct.pack('<I', 1))
- # output_offsets[MAX_TENSORS]
- self.file_model.write(struct.pack('<I', self.output_tensors[-1].eaddr))
- for idx in range(1, MAX_TENSORS):
- self.file_model.write(struct.pack('<I', 0))
- # output_elem_size[MAX_TENSORS]
- self.file_model.write(struct.pack('<I', self.output_tensors[-1].elem_size))
- for idx in range(1, MAX_TENSORS):
- self.file_model.write(struct.pack('<I', 0))
- # output_dims[MAX_TENSORS][MAX_RANK]
- for rank in range(MAX_RANK):
- self.file_model.write(struct.pack('<I', self.output_tensors[-1].dims[rank]))
- for idx in range(1, MAX_TENSORS):
- for rank in range(MAX_RANK):
- self.file_model.write(struct.pack('<I', 0))
- # output_emod_y[MAX_TENSORS]
- self.file_model.write(struct.pack('<I', self.output_tensors[-1].emod_y))
- for idx in range(1, MAX_TENSORS):
- self.file_model.write(struct.pack('<I', 0))
- # output_emod_z[MAX_TENSORS]
- self.file_model.write(struct.pack('<I', self.output_tensors[-1].emod_z))
- for idx in range(1, MAX_TENSORS):
- self.file_model.write(struct.pack('<I', 0))
- ## quantization information; @todo how to store exact values?
- # output_quant_z[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- self.file_model.write(struct.pack('<I', 0));
- # output_quant_s[MAX_TENSORS]
- for idx in range(MAX_TENSORS):
- self.file_model.write(struct.pack('<f', 1.0));
-
- ## @brief fill the content of program binary
- def fill_program (self):
- self.file_model.seek(META_SIZE)
- with open(self.path_prog_binary, "rb") as f:
- self.file_model.write(f.read())
-
- ## @brief fill the content of input weight
- def fill_weight (self):
- self.file_model.seek(META_SIZE + self.size_prog_binary)
- for weight in self.input_weights:
- self.file_model.write(weight.data)
-
- ## @brief close all files
- def finalize (self):
- self.file_model.close()
- self.file_prog.close()
-
- if self.debug_mode:
- print ("Model metadata:\t%8d" % META_SIZE)
- print ("Prog binary:\t%8d" % self.size_prog_binary)
- print ("All weight:\t%8d" % self.size_all_weights)
- print ("Final model:\t%8d" % self.get_model_size())
-
-## @brief the main routine
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='Generate model from test data')
- parser.add_argument('dir', metavar='DIR', type=str, nargs=1,
- help='A path of directory where the test data are located')
- parser.add_argument('version', metavar='VER', type=int, nargs=1,
- help='A version of NPU model binary (only support 1 or 2)')
- parser.add_argument('--single', '-s', action='store_true', dest='single_mode', default=False,
- help='Turn on the single layer mode (optional)', required=False)
- parser.add_argument('--debug', '-d', action='store_true', dest='debug_mode', default=False,
- help='Turn on the debug mode (optional)', required=False)
- parser.add_argument('--outdir', '-o', dest='outdir', type=str,
- help='A path of directory where the generated model would be placed (optional)', required=False)
- args = parser.parse_args()
-
- try:
- model = NPUModel (args)
- model.gen()
- except Exception as e:
- print ("[FAILURE] error detected: " + str(e))
+++ /dev/null
-/**
- * Proprietary
- * Copyright (C) 2019 Samsung Electronics
- * Copyright (C) 2019 Dongju Chae <dongju.chae@samsung.com>
- */
-/**
- * @file gen_ref_data.cpp
- * @date 13 Feb 2020
- * @brief generate the reference input/output/weight data for NPU models
- * @author Dongju Chae <dongju.chae@samsung.com>
- * @note This requires the libnpuvision package to be compiled.
- */
-
-#include <iostream>
-#include <fstream>
-#include <unistd.h>
-#include <string.h>
-
-#include <NPUemul.h>
-
-using namespace std;
-
-/** @brief a wrapper for TrinityCore/DataGen */
-class RefDataGen: public NPUCoreEmul {
- public:
- /** @brief constructor */
- RefDataGen(const char *_output_dir): output_dir(_output_dir) {
- model = buffer = NULL;
- }
- /** @brief destructor */
- ~RefDataGen();
- /** @brief load and verify the model binary */
- int loadModel(const string target_path);
- /** @brief emit function to generate reference data for the model */
- int emit();
-
- protected:
- /** @brief dump input or output data */
- void dumpData(const string name, const char *data, uint32_t size);
- /** @brief generate random input data */
- void genRandomInput();
- /** @brief generate golden output data */
- void genGoldenOutput();
-
- private:
- const string output_dir;
- char *model;
- char *buffer;
- npubin_meta meta;
-};
-
-/** @brief destructor */
-RefDataGen::~RefDataGen()
-{
- if (model)
- delete model;
- if (buffer)
- delete buffer;
-}
-
-/** @brief load and verify the model binary */
-int RefDataGen::loadModel(const string target_path)
-{
- ifstream ifs(target_path, ios::binary);
- size_t model_size;
-
- if (!ifs.is_open()) {
- cerr << "Cannot find the target model " << target_path << endl;
- return -1;
- }
-
- ifs.seekg (0, ios::end);
- model_size = ifs.tellg();
- if (model_size <= 0) {
- cerr << "Invalid model binary" << endl;
- goto exit_error;
- }
- ifs.seekg (0, ios::beg);
-
- this->model = new char[model_size];
- ifs.read(this->model, model_size);
-
- memcpy(&this->meta, this->model, NPUBIN_META_SIZE);
- if (this->meta.size != model_size) {
- cerr << "The model size is not matched" << endl;
- goto exit_error;
- }
-
- if (this->meta.buffer_size == 0) {
- cerr << "The buffer size should be larger than 0" << endl;
- goto exit_error;
- }
-
- if (!CHECK_NPUBIN(this->meta.magiccode)) {
- cerr << "This model binary is not compatible to our design" << endl;
- cerr << "Please refer to ./include/common/npubinfmt.h" << endl;
- goto exit_error;
- }
-
- if (NPUBIN_VERSION(this->meta.magiccode) > NPUBIN_VERSION_MAX) {
- cerr << "Unsupported npubinfmt version" << endl;
- goto exit_error;
- }
-
- this->buffer = new char[this->meta.buffer_size];
-
- ifs.close();
- return 0;
-
-exit_error:
- if (this->model)
- delete this->model;
- ifs.close();
- return -1;
-}
-
-/** @brief dump input or output data */
-void RefDataGen::dumpData(const string name, const char *data, uint32_t size)
-{
- ofstream ofs(output_dir + '/' + name, ios::binary);
-
- if (!ofs.is_open()) {
- cerr << "Cannot create input data" << endl;
- return;
- }
-
- ofs.write(data, size);
- ofs.close();
-}
-
-/** @brief generate random input data */
-void RefDataGen::genRandomInput()
-{
- srand (time(NULL));
-
- switch (NPUBIN_VERSION(this->meta.magiccode)) {
- case 0: /* regarded as version 1 */
- case 1:
- for (uint64_t i = 0; i < this->meta.input_size; i++)
- this->buffer[this->meta.input_offset + i] = (uint8_t) rand();
- dumpData("input_fmap.bin",
- this->buffer + this->meta.input_offset, this->meta.input_size);
- break;
- case 2:
- for (uint32_t i = 0; i < this->meta.input_num; i++) {
- /* it may not be the exact size; but enough for testing */
- uint32_t input_size = this->meta.input_elem_size[i];
-
- for (uint32_t j = 0; j < MAX_RANK; j++)
- input_size *= this->meta.input_dims[i][j];
-
- for (uint32_t j = 0; j < input_size; j++)
- this->buffer[this->meta.input_offsets[i] + j] = (uint8_t) rand();
-
- dumpData("input_fmap_" + to_string(i) + ".bin",
- this->buffer + this->meta.input_offsets[i], input_size);
- }
- break;
- default:
- abort (); /* already checked */
- }
-}
-
-/** @brief generate golden output data */
-void RefDataGen::genGoldenOutput()
-{
- switch (NPUBIN_VERSION(this->meta.magiccode)) {
- case 0: /* regarded as version 1 */
- case 1:
- dumpData("output_fmap.bin",
- this->buffer + this->meta.output_offset, this->meta.output_size);
- break;
- case 2:
- for (uint32_t i = 0; i < this->meta.output_num; i++) {
- /* it may not be the exact size; but enough for testing */
- uint32_t output_size = this->meta.output_elem_size[i];
-
- for (uint32_t j = 0; j < MAX_RANK; j++)
- output_size *= this->meta.output_dims[i][j];
-
- dumpData("output_fmap_" + to_string(i) + ".bin",
- this->buffer + this->meta.output_offsets[i], output_size);
- }
- break;
- default:
- abort (); /* already checked */
- }
-}
-
-/** @brief emit function to generate reference data for the model */
-int RefDataGen::emit()
-{
- genRandomInput();
- int ret;
-
- ret = run(this->model, this->buffer);
- if (ret == 0)
- genGoldenOutput();
-
- return ret;
-}
-
-static void print_usage(const char *name)
-{
- cerr << "Usage: " << name << " [options] binary" << endl;
- cerr << "Options: " << endl;
- cerr << " -o <arg> \t Specify output directory path" << endl;
-}
-
-static char default_output_dir[] = ".";
-
-/** @brief main function */
-int main(int argc, char **argv)
-{
- char *target_path = NULL;
- char *output_dir = NULL;
- int i, c;
-
- /* parse option arguments */
- opterr = 0;
- while ((c = getopt (argc, argv, "o:")) != -1) {
- switch (c)
- {
- case 'o':
- output_dir = optarg;
- break;
- case '?':
- if (optopt == 'o')
- cerr << "Option -o requires an argument" << endl;
- else
- cerr << "Unknown option" << endl;
- cerr << endl;
-
- print_usage(argv[0]);
- return -1;
- default:
- abort(); /* impossible */
- }
- }
-
- if (output_dir == NULL)
- output_dir = default_output_dir;
-
- /* parse non-option arguments */
- for (i = optind; i < argc; i++)
- target_path = argv[i];
-
- if (target_path == NULL) {
- print_usage(argv[0]);
- return -1;
- }
-
- /* generate reference data */
- RefDataGen gen(output_dir);
-
- if (gen.loadModel(target_path) != 0)
- return -1;
-
- return gen.emit();
-}
+++ /dev/null
-/**
- * Proprietary
- * Copyright (C) 2019 Samsung Electronics
- * Copyright (C) 2019 Dongju Chae <dongju.chae@samsung.com>
- */
-/**
- * @file gen_visa_prog.cpp
- * @date 07 Nov 2019
- * @brief generate VISA program binary for multi-layer NPU models
- * @author Dongju Chae <dongju.chae@samsung.com>
- * @note This requires the libnpuvision package to be compiled.
- */
-
-#include <iostream>
-#include <fstream>
-#include <cstdlib>
-#include <ctime>
-#include <vector>
-
-#include <TrinityCore.h>
-
-#define INPUT_ADDR_START 0x1000
-#define OUTPUT_ADDR_START 0x100000
-/** the output address of last layer should be this value */
-#define OUTPUT_ADDR_FINAL 0x200000
-
-/** revise this later */
-typedef enum {
- TRINITY_OPCODE_CONV = 0,
- TRINITY_OPCODE_MAXP = 4,
- TRINITY_OPCODE_AVGP = 5,
- TRINITY_OPCODE_RELU = 6,
- TRINITY_OPCODE_ESUM = 7
-} trinity_opcode;
-
-using namespace std;
-using namespace trinity_vision;
-
-static int avgp_rshamts[17] = {0,0,0,1,1,0,2,0,2,3,0,0,3,0,0,0,3};
-static int avgp_mults[17] = {0,0,1073741824,1431655765,1073741824,0,1431655765,0,1073741824,1908874354,0,0,1431655765,0,0,0,1073741824};
-
-/** @brief a wrapper for TrinityCore/DataGen */
-class ProgGen: public TrinityCore<64> {
- public:
- /** @brief constructor */
- ProgGen(const string _dir): dir(_dir) {
- addr_in = addr_wgt = 0;
- addr_out = OUTPUT_ADDR_START;
- addr_out_prev = INPUT_ADDR_START + get_random_offset(); /* any value is fine */
- }
- /** @brief emit function to generate the testdata for multi-layer models */
- int emit();
- /** @brief append an trinity operation to the operation list */
- void append(TRINITY_CORE_PARA_OP& op) { ops.push_back(op); }
- /** @brief convert the operation to VISA assembly codes */
- void write_asm_file(size_t idx, TRINITY_FMAP_PARA& in,
- TRINITY_FMAP_PARA& out, WGT_PARA& weight);
- /** @brief get random offset aligned to 0x1000 */
- uint32_t get_random_offset();
-
- private:
- const string dir;
- uint32_t addr_in, addr_wgt;
- uint32_t addr_out, addr_out_prev;
- vector<TRINITY_CORE_PARA_OP> ops;
- ofstream ofs_asm;
-};
-
-/** @brief get random offset aligned to 0x1000 */
-uint32_t ProgGen::get_random_offset()
-{
- return (std::rand() % 9 + 1) * 0x1000;
-}
-
-/** @brief convert the operation to VISA assembly codes */
-void ProgGen::write_asm_file(size_t idx, TRINITY_FMAP_PARA& in,
- TRINITY_FMAP_PARA& out, WGT_PARA& weight)
-{
- TRINITY_CORE_PARA_OP& op = ops[idx];
- CORE_PARA cp = get_core_para(op);
- bool first = (idx == 0);
- bool last = (idx == ops.size() - 1);
- int dma_in1_en = 0;
- int dma_wgt_en = 0;
-
- switch (op.info.OPCODE) {
- case TRINITY_OPCODE_CONV:
- dma_wgt_en = 1;
- if (op.info.CNV_ESUM_EN) {
- ofs_asm << "CONVE";
- dma_in1_en = 1;
- } else {
- ofs_asm << "CONV";
- }
- break;
- case TRINITY_OPCODE_MAXP:
- ofs_asm << "MAXP";
- break;
- case TRINITY_OPCODE_AVGP:
- ofs_asm << "AVGP";
- break;
- case TRINITY_OPCODE_RELU:
- ofs_asm << "RELU";
- break;
- case TRINITY_OPCODE_ESUM:
- dma_in1_en = 1;
- ofs_asm << "ESUM";
- break;
- default: /** other opcodes are not supported yet */
- return;
- }
-
- ofs_asm << dec;
- /** dma enable */
- ofs_asm << " dma_in0_en=1 dma_in1_en=" << dma_in1_en
- << " dma_out_en=1 dma_wgt_en=" << dma_wgt_en;
- /** bias/relu enable */
- ofs_asm << " bias_en=" << cp.BIAS_EN << " relu_en=" << cp.RELU_EN;
- /** padding */
- ofs_asm << " pad_t=" << cp.PAD_T << " pad_b=" << cp.PAD_B;
- ofs_asm << " pad_l=" << cp.PAD_L << " pad_r=" << cp.PAD_R;
- /** filter/stride minus 1 */
- ofs_asm << " flt_w_m1=" << cp.FLT_W_M1 << " flt_h_m1=" << cp.FLT_H_M1;
- ofs_asm << " str_x_m1=" << cp.STR_X_M1 << " str_y_m1=" << cp.STR_Y_M1;
- /** weight qbit minus 1 */
- ofs_asm << " wgt_qbit_m1=" << cp.WGT_QBIT_M1;
- /** in/out parameters */
- ofs_asm << " in0_zero=" << cp.IN0_ZERO
- << " in0_shamt=" << (op.info.OPCODE == TRINITY_OPCODE_ESUM ?
- cp.IN0_RSHAMT : cp.IN0_LSHAMT)
- << " in0_mult=" << cp.IN0_MULT;
- ofs_asm << " in1_zero=" << cp.IN1_ZERO
- << " in1_shamt=" << cp.IN1_RSHAMT
- << " in1_mult=" << cp.IN1_MULT;
- ofs_asm << " out_zero=" << cp.OUT_ZERO
- << " out_shamt=" << cp.OUT_LSHAMT
- << " out_mult=" << cp.OUT_MULT;
- /** in/out data size */
- ofs_asm << " out_w_m1=" << cp.OUT_W_M1 << " out_h_m1=" << cp.OUT_H_M1
- << " out_d_m1=" << cp.OUT_D_M1 << " in0_d_m1=" << cp.IN0_D_M1;
- /** etc */
- ofs_asm << " emro=0 emri=0";
-
- /** mod calculation */
- ofs_asm << " in0_imod_y=" << 64 * in.width << " in0_imod_z=" << (64 * in.width) * in.height;
- ofs_asm << " in0_emod_y=" << 64 * in.width << " in0_emod_z=" << (64 * in.width) * in.height;
- if (op.info.OPCODE == TRINITY_OPCODE_ESUM) {
- ofs_asm << " in1_imod_y=" << 64 * in.width << " in1_imod_z=" << (64 * in.width) * in.height;
- ofs_asm << " in1_emod_y=" << 64 * in.width << " in1_emod_z=" << (64 * in.width) * in.height;
- } else if (op.info.CNV_ESUM_EN) {
- ofs_asm << " in1_imod_y=" << 64 * out.width << " in1_imod_z=" << (64 * out.width) * out.height;
- ofs_asm << " in1_emod_y=" << 64 * out.width << " in1_emod_z=" << (64 * out.width) * out.height;
- }
-
- ofs_asm << " out_imod_y=" << 64 * out.width << " out_imod_z=" << (64 * out.width) * out.height;
- ofs_asm << " out_emod_y=" << 64 * out.width << " out_emod_z=" << (64 * out.width) * out.height;
-
- /** address calculation */
- uint32_t size_input = in.width * in.height * in.depth;
- uint32_t size_output = out.width * out.height * out.depth;
- uint32_t size_weight = weight.CACL_SIZE_BYTE;
-
- if (first)
- addr_in += addr_out_prev + size_input + get_random_offset();
-
- ofs_asm << hex;
- /** the input address is the output address of the previous layer */
- ofs_asm << " in0_iaddr0=0x0 in0_eaddr0=0x" << addr_out_prev;
- if (op.info.OPCODE == TRINITY_OPCODE_ESUM || op.info.CNV_ESUM_EN) {
- ofs_asm << " in1_iaddr0=0x40000 in1_eaddr0=0x" << addr_in;
- addr_in += size_input + get_random_offset();
- }
- if (op.info.OPCODE < TRINITY_OPCODE_MAXP)
- ofs_asm << " wgt_iaddr0=0x80000 wgt_eaddr0=0x" << addr_wgt;
- ofs_asm << " out_iaddr0=0xC0000 out_eaddr0=0x" << (last ? OUTPUT_ADDR_FINAL : addr_out);
-
- addr_out_prev = addr_out;
- addr_wgt += size_weight;
- addr_out += size_output + get_random_offset();
-
- TR_ASSERT (addr_out < OUTPUT_ADDR_FINAL);
-
- ofs_asm << endl;
-}
-
-/** @brief emit function to generate the testdata for multi-layer models */
-int ProgGen::emit()
-{
- const string mkdir_cmd = "mkdir -p " + dir;
- const string encode_cmd = "encoder " + dir + "/program.asm " + dir + "/program.bin";
-
- if (system(mkdir_cmd.c_str()) != 0) {
- cerr << "Fail to create a directory, " << mkdir_cmd
- << ", where the result would be placed" << endl;
- return -1;
- }
-
- ofs_asm.open(dir + "/program.asm");
-
- for (size_t idx = 0; idx < ops.size(); idx++) {
- TRINITY_FMAP_PARA para_fmap_in;
- TRINITY_FMAP_PARA para_fmap_out;
- WGT_PARA para_weight;
-
- /** parse the operation and assign its parameters */
- if (!calc_tensor_data_size(ops[idx],
- para_fmap_in, para_weight, para_fmap_out)) {
- cerr << "Fail to parse a trinity operation" << endl;
- ofs_asm.close();
- return -1;
- }
-
- /** generate weight data */
- if (ops[idx].info.OPCODE < TRINITY_OPCODE_MAXP) {
- TrinityWgt trinity_weight;
-
- trinity_weight.alloc(para_weight);
- trinity_weight.gen_rand();
- trinity_weight.write_data_file(dir, "input_weight_" + to_string(idx),
- WBIN_NORMAL, true);
- }
-
- write_asm_file(idx, para_fmap_in, para_fmap_out, para_weight);
- }
-
- ofs_asm << "SAW" << endl;
- ofs_asm << "NOP" << endl;
- ofs_asm.close();
-
- if (system(encode_cmd.c_str()) != 0) {
- cerr << "Fail to encode visa program, " << encode_cmd << endl;
- return -1;
- }
-
- return 0;
-}
-
-/** @brief generate testdata for testcase1 (CONV/CONV) */
-int gen_prog_testcase1(const string base, const string name)
-{
- ProgGen gen(base + '/' + name);
-
- cout << "testcase1: CONV/CONV" << endl;
-
- /** 1) 3x3 CONV */
- TRINITY_CORE_PARA_OP conv1;
-
- conv1.info.OPCODE = TRINITY_OPCODE_CONV;
- /** the number of code bits per weight */
- conv1.cfg.WGT_QBIT = 2;
- /** the dimension of convoution filter */
- conv1.cfg.FLT_H = 3;
- conv1.cfg.FLT_W = 3;
- /** the stride of convolution window */
- conv1.cfg.STR_Y = 2;
- conv1.cfg.STR_X = 2;
- /** the amount of padding in pixels */
- conv1.cfg.PAD_L = 1;
- conv1.cfg.PAD_R = 1;
- conv1.cfg.PAD_T = 1;
- conv1.cfg.PAD_B = 1;
- /** input/output featuer map size */
- conv1.data_size.OUT_H = 16;
- conv1.data_size.OUT_W = 16;
- conv1.data_size.OUT_D = 64;
- conv1.data_size.IN0_D = 64;
-
- gen.append(conv1);
-
- /** 2) 1x1 CONV */
- TRINITY_CORE_PARA_OP conv2;
-
- conv2.info.OPCODE = TRINITY_OPCODE_CONV;
- /** the number of code bits per weight */
- conv2.cfg.WGT_QBIT = 2;
- /** the dimension of convoution filter */
- conv2.cfg.FLT_H = 1;
- conv2.cfg.FLT_W = 1;
- /** the stride of convolution window */
- conv2.cfg.STR_Y = 1;
- conv2.cfg.STR_X = 1;
- /** input/output featuer map size */
- conv2.data_size.OUT_H = 16;
- conv2.data_size.OUT_W = 16;
- conv2.data_size.OUT_D = 64;
- conv2.data_size.IN0_D = 64;
-
- gen.append(conv2);
-
- return gen.emit();
-}
-
-/** @brief generate testdata for testcase2 (CONV/ESUM/MAXP) */
-int gen_prog_testcase2(const string base, const string name)
-{
- ProgGen gen(base + '/' + name);
-
- cout << "testcase2: CONV/ESUM/MAXP" << endl;
-
- /** 1) 3x3 CONV */
- TRINITY_CORE_PARA_OP conv;
-
- conv.info.OPCODE = TRINITY_OPCODE_CONV;
- /** the number of code bits per weight */
- conv.cfg.WGT_QBIT = 2;
- /** the dimension of convoution filter */
- conv.cfg.FLT_H = 3;
- conv.cfg.FLT_W = 3;
- /** the stride of convolution window */
- conv.cfg.STR_Y = 2;
- conv.cfg.STR_X = 2;
- /** the amount of padding in pixels */
- conv.cfg.PAD_L = 1;
- conv.cfg.PAD_R = 1;
- conv.cfg.PAD_T = 1;
- conv.cfg.PAD_B = 1;
- /** input/output featuer map size */
- conv.data_size.OUT_H = 16;
- conv.data_size.OUT_W = 16;
- conv.data_size.OUT_D = 64;
- conv.data_size.IN0_D = 64;
-
- gen.append(conv);
-
- /** 2) ESUM */
- TRINITY_CORE_PARA_OP esum;
-
- esum.info.OPCODE = TRINITY_OPCODE_ESUM;
- esum.cfg.WGT_QBIT = 1;
- esum.cfg.FLT_H = 1;
- esum.cfg.FLT_W = 1;
- esum.data_size.OUT_H = 16;
- esum.data_size.OUT_W = 16;
- esum.data_size.OUT_D = 64;
- esum.data_size.IN0_D = 64;
- esum.quant.IN0_MULT = 0x1 << 10;
- esum.quant.IN1_MULT = 0x1 << 10;
- esum.quant.IN0_LSHAMT = 20;
- esum.quant.IN1_LSHAMT = 20;
-
- gen.append(esum);
-
- /** 3) MAXP */
- TRINITY_CORE_PARA_OP maxp;
-
- maxp.info.OPCODE = TRINITY_OPCODE_MAXP;
- maxp.cfg.WGT_QBIT = 1;
- maxp.cfg.FLT_H = 2;
- maxp.cfg.FLT_W = 2;
- maxp.data_size.OUT_H = 15;
- maxp.data_size.OUT_W = 15;
- maxp.data_size.OUT_D = 64;
- maxp.data_size.IN0_D = 64;
- maxp.quant.IN0_ZERO = 0;
- maxp.quant.OUT_ZERO = 0;
-
- gen.append(maxp);
-
- return gen.emit();
-}
-
-/** @brief generate testdata for testcase3 (CONVE/ReLU/AVGP) */
-int gen_prog_testcase3(const string base, const string name)
-{
- ProgGen gen(base + '/' + name);
-
- cout << "testcase3: CONVE/ReLU/AVGP" << endl;
-
- /** 1) 3x3 CONVE */
- TRINITY_CORE_PARA_OP conve;
-
- conve.info.OPCODE = TRINITY_OPCODE_CONV;
- conve.info.CNV_ESUM_EN = 1;
- /** the number of code bits per weight */
- conve.cfg.WGT_QBIT = 2;
- /** the dimension of convoution filter */
- conve.cfg.FLT_H = 3;
- conve.cfg.FLT_W = 3;
- /** the stride of convolution window */
- conve.cfg.STR_Y = 2;
- conve.cfg.STR_X = 2;
- /** the amount of padding in pixels */
- conve.cfg.PAD_L = 1;
- conve.cfg.PAD_R = 1;
- conve.cfg.PAD_T = 1;
- conve.cfg.PAD_B = 1;
- /** input/output featuer map size */
- conve.data_size.OUT_H = 16;
- conve.data_size.OUT_W = 16;
- conve.data_size.OUT_D = 64;
- conve.data_size.IN0_D = 64;
- /** quantization */
- conve.quant.IN1_ZERO = 6;
- conve.quant.IN1_LSHAMT = 20;
- conve.quant.IN1_RSHAMT = 0;
- conve.quant.IN1_MULT = 0x400;
-
- gen.append(conve);
-
- /** 2) ReLU */
- TRINITY_CORE_PARA_OP relu;
-
- relu.info.OPCODE = TRINITY_OPCODE_RELU;
- relu.cfg.WGT_QBIT = 1;
- relu.cfg.FLT_H = 1;
- relu.cfg.FLT_W = 1;
- relu.data_size.OUT_H = 16;
- relu.data_size.OUT_W = 16;
- relu.data_size.OUT_D = 64;
- relu.data_size.IN0_D = 64;
- relu.quant.IN0_ZERO = 4;
- relu.quant.OUT_ZERO = 4;
-
- gen.append(relu);
-
- /** 3) AVGP */
- TRINITY_CORE_PARA_OP avgp;
-
- avgp.info.OPCODE = TRINITY_OPCODE_AVGP;
- avgp.cfg.WGT_QBIT = 1;
- avgp.cfg.FLT_H = 2;
- avgp.cfg.FLT_W = 2;
- avgp.data_size.OUT_H = 15;
- avgp.data_size.OUT_W = 15;
- avgp.data_size.OUT_D = 64;
- avgp.data_size.IN0_D = 64;
- /** some fields have some assumed rules */
- avgp.quant.IN0_ZERO = 0;
- avgp.quant.OUT_ZERO = 0;
- avgp.quant.OUT_LSHAMT = 0;
- avgp.quant.OUT_RSHAMT = avgp_rshamts[avgp.cfg.FLT_H * avgp.cfg.FLT_W];
- avgp.quant.OUT_MULT = avgp_mults[avgp.cfg.FLT_H * avgp.cfg.FLT_W];
-
- gen.append(avgp);
-
- return gen.emit();
-}
-
-/** @brief main function */
-int main(int argc, char **argv)
-{
- if (argc != 2) {
- cerr << "Usage: " << argv[0] << " result_dir" << endl;
- return -1;
- }
-
- std::srand(time(NULL));
-
- TR_ASSERT(gen_prog_testcase1(argv[1], "testcase1") == 0);
- TR_ASSERT(gen_prog_testcase2(argv[1], "testcase2") == 0);
- TR_ASSERT(gen_prog_testcase3(argv[1], "testcase3") == 0);
-
- /** @todo add more multi-layer models */
-
- return 0;
-}
+++ /dev/null
-# Build tools
-
-libnpuvision_dep = dependency('libnpuvision')
-
-gen_visa_asm = executable('gen_visa_prog',
- 'gen_visa_prog.cpp',
- include_directories: ne_common_inc,
- dependencies : [libnpuvision_dep],
- install : false)
-
-gen_ref_data = executable('gen_ref_data',
- 'gen_ref_data.cpp',
- include_directories: ne_common_inc,
- dependencies : [libnpuvision_dep, ne_core_npu_emul_dep],
- install : false)
+++ /dev/null
-subdir('gen-testdata')