This patch adds triv2 apptest using npubinfmt v3 model.
It's tested with npu-engine-testdata (1.2.0).
Also, fix some minor bugs.
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
DATA_LAYOUT_NONE = 0, /**< undefined layout, regarded as raw data */
DATA_LAYOUT_NHWC, /**< standard layout, NHWC */
DATA_LAYOUT_NCHW, /**< standard layout, NCHW */
- DATA_LAYOUT_SRNPU, /**< customized layout for SRNPU (based on NHWC) */
+ DATA_LAYOUT_TRIV, /**< customized layout for TRIV1 (based on NHWC) */
+ DATA_LAYOUT_SRNPU = DATA_LAYOUT_TRIV,
+ /**< alias for backward-compatibility */
+ DATA_LAYOUT_TRIV2, /**< customized layout for TRIV2 (based on NHWC) */
} data_layout;
/**
typedef enum {
/* 8 bits */
DATA_TYPE_SRNPU = 0, /**< integer-arithmetic-only quantization (TRIV1) */
- DATA_TYPE_QASYMM8, /**< 8-bit asymmetric quantization (TRIV2) */
+ DATA_TYPE_QASYMM8, /**< 8-bit asymmetric quantization (TRIV2) */
DATA_TYPE_INT8,
DATA_TYPE_UINT8,
/* 16 bits */
- DATA_TYPE_QSYMM16, /**< 16-bit symmetric quantization (TRIV2) */
+ DATA_TYPE_QSYMM16, /**< 16-bit symmetric quantization (TRIV2) */
DATA_TYPE_INT16,
DATA_TYPE_UINT16,
/* 32 bits */
%if 0%{?unit_test}
%define enable_npu_emul true
BuildRequires: npu-engine-emul
-BuildRequires: npu-engine-testdata
+BuildRequires: npu-engine-testdata > 1.2
# valgrind requires the same arch with host (x86_64)
%if 0%{?use_valgrind}
# Run example_visa with sample models
mkdir -p sample_models_v1
mkdir -p sample_models_v2
+ mkdir -p sample_models_v3
cp -rf /usr/share/npu-engine/testdata/npubinfmt_v1/testcase* sample_models_v1/
cp -rf /usr/share/npu-engine/testdata/npubinfmt_v2/testcase* sample_models_v2/
+ cp -rf /usr/share/npu-engine/testdata/npubinfmt_v3/* sample_models_v3/
- ./apptests/apptest_example_visa sample_models_v1 > /dev/null 2> /dev/null
- ./apptests/apptest_example_visa sample_models_v2 > /dev/null 2> /dev/null
+ ./apptests/apptest_example_visa sample_models_v1 > /dev/null
+ ./apptests/apptest_example_visa sample_models_v2 > /dev/null
+ ./apptests/apptest_example_visa_triv2 sample_models_v3 > /dev/null
rm -rf sample_models_*
/** all tensors have the buffer as the parent hwmem */
for (uint32_t i = 0; i < num_input_; i++)
- input_.push_back (new HWmem (new HWmemChunk));
+ input_.push_back (std::unique_ptr<HWmem>(new HWmem (new HWmemChunk)));
for (uint32_t i = 0; i < num_output_; i++)
- output_.push_back (new HWmem (new HWmemChunk));
+ output_.push_back (std::unique_ptr<HWmem>(new HWmem (new HWmemChunk)));
int status = 0;
if (meta != nullptr) {
HWmem *
Buffer::getInputTensor (uint32_t idx) {
if (idx < num_input_)
- return input_[idx];
+ return input_[idx].get();
return nullptr;
}
HWmem *
Buffer::getOutputTensor (uint32_t idx) {
if (idx < num_output_)
- return output_[idx];
+ return output_[idx].get();
return nullptr;
}
int setInputTensor (uint32_t idx, size_t offset, size_t size);
int setOutputTensor (uint32_t idx, size_t offset, size_t size);
- std::vector <HWmem *> input_; /**< input tensors */
- std::vector <HWmem *> output_; /**< output tensors */
+ std::vector <std::unique_ptr<HWmem>> input_; /**< input tensors */
+ std::vector <std::unique_ptr<HWmem>> output_; /**< output tensors */
uint32_t num_input_; /**< number of input tensors */
uint32_t num_output_; /**< number of output tensors */
for (uint32_t idx = 0; idx < input->num_buffers; idx++) {
auto func = std::bind (TrinityVision2::manipulateData, model, idx, true,
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3);
- int status = comm_.extractGenericBuffer (&input->bufs[idx],
- segt->getInputSegment(idx)->getData(), func);
+ int status = comm_.extractGenericBuffer (
+ &input->bufs[idx],
+ segt->getInputSegment(idx)->getData() + segt->getInputSegmentOffset(idx),
+ func);
if (status != 0) {
logerr (TAG, "Failed to feed input segment: %d\n", status);
return status;
/** user needs to free this */
output.bufs[idx].addr = malloc (output_tensor_size);
- auto func = std::bind (TrinityVision::manipulateData, model, idx, false,
+ auto func = std::bind (TrinityVision2::manipulateData, model, idx, false,
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3);
- int status = comm_.insertGenericBuffer (segt->getOutputSegment(idx)->getData(),
+ int status = comm_.insertGenericBuffer (
+ segt->getOutputSegment(idx)->getData() + segt->getOutputSegmentOffset(idx),
&output.bufs[idx], func);
if (status != 0) {
logerr (TAG, "Failed to return output buffer: %d\n", status);
input_config.model_id = model->getDmabuf ();
if (segt != nullptr) {
input_config.dmabuf_id = segt->getDmabuf ();
+#if 0
+ input_config.num_segments = segt->getNumTotalSegments ();
+#endif
} else {
/** some instructions do not require the segment table (e.g., nop) */
input_config.dmabuf_id = -1;
+#if 0
+ input_config.num_segments = 0;
+#endif
}
/** set constraints */
if (parent == nullptr)
return -EINVAL;
+ int dmabuf = parent->getDmabuf();
/** parent was not allocated */
- if (parent->getDmabuf() < 0)
+ if (dmabuf < 0)
return -EPERM;
/** check offset */
return -EINVAL;
/** just set the size */
+ hwmem->setDmabuf (dmabuf);
hwmem->setSize (size);
return 0;
}
virtual uint32_t getWeightSegmentIndex () const { return 0; }
virtual uint32_t getInputSegmentIndex (uint32_t idx) const { return 0; }
virtual uint32_t getOutputSegmentIndex (uint32_t idx) const { return 0; }
+ virtual uint32_t getInputSegmentOffset (uint32_t idx) const { return 0; }
+ virtual uint32_t getOutputSegmentOffset (uint32_t idx) const { return 0; }
uint64_t getSize () const { return meta_->size; }
uint64_t getProgramSize () const { return meta_->program_size; }
assert (idx < getOutputNum ());
return meta_->output_seg_idx[idx];
}
+ uint32_t getInputSegmentOffset (uint32_t idx) const override {
+ assert (idx < getInputNum ());
+ return meta_->input_seg_off[idx];
+ }
+ uint32_t getOutputSegmentOffset (uint32_t idx) const override {
+ assert (idx < getOutputNum ());
+ return meta_->output_seg_off[idx];
+ }
private:
Metadata_v3 (npubin_meta *meta);
void
SegmentTable::setSegmentSlot (HWmem *hwmem, int slot)
{
- segments_.push_back (hwmem);
+ segments_.push_back (std::unique_ptr<HWmem>(hwmem));
/**
- * Set its dmabuf FD to each slot in the segment table. The kernel driver has
- * a responsibility to convert these dmabuf FDs to actual physical addresses,
- * accessed by NPU hardware, before invoking an inference. Note that each entry
- * in segmen table has 64-bits slot.
+ * The size of a segment table should have a single page size (i.e., 4KB).
+ * As this user library cannot find out the physical address of each segment,
+ * just set the dmabuf FD and offset to describe the base address of segments.
+ * The first half (2KB) of segment table contains dmabuf FDs, and the second
+ * half (2KB) contains their offsets. As the number of maximum segments are
+ * 256, 4KB-sized segment table should be enough cover such information.
+ *
+ * The kernel driver has a responsibility to convert these dmabuf FDs/offset
+ * to actual physical addresses, accessed by NPU hardware, before invoking
+ * an inference. Note that each entry in segment table has 32-bits slot.
*/
- unsigned int unsigned_dmabuf = hwmem->getDmabuf ();
- reinterpret_cast<uint64_t *>(getData())[slot] = unsigned_dmabuf;
+ reinterpret_cast<int32_t *>(getData())[slot] = hwmem->getDmabuf ();
+ reinterpret_cast<uint32_t *>(getData() + getSize()/2)[slot] = hwmem->getOffset ();
}
/**
return -EINVAL;
}
+ /** we need to consider the scenario that users provide input/weight as external dmabuf */
+ for (uint32_t i = 0; i < meta->getSegmentsNum (); i++) {
+ uint32_t size = meta->getSegmentSize (i);
+ HWmem * hwmem = nullptr;
+
+ /** check weight segment first */
+ if (meta->getWeightSize () != 0 && i == meta->getWeightSegmentIndex ()) {
+ if (meta->getWeightSize () > size) {
+ logerr (TAG, "The size of weight (%u) is larger than segment size (%u)\n",
+ meta->getWeightSize (), size);
+ segments_.clear ();
+ return -EINVAL;
+ }
+
+ /** hwmem for weight segment is the back portion of model hwmem */
+ if (model->isExternal ()) {
+ hwmem = new HWmem (new HWmemExternal);
+ hwmem->setDriverAPI (getDriverAPI ());
+ hwmem->setDmabuf (model->getDmabuf ());
+ /** offset of weight base addr */
+ hwmem->setOffset (model->getOffset () + meta->getMetaSize () + meta->getProgramSize ());
+ hwmem->setSize (size);
+ } else {
+ hwmem = new HWmem (new HWmemChunk);
+ hwmem->setParent (const_cast<Model *>(model));
+ hwmem->setOffset (meta->getMetaSize () + meta->getProgramSize ());
+
+ int status = hwmem->alloc (size);
+ if (status != 0) {
+ logerr (TAG, "Failed to allocate %uth segment with size %u: %d\n",
+ i, size, status);
+ segments_.clear ();
+ return status;
+ }
+ }
+ } else { /** check input segments */
+ for (uint32_t j = 0; j < meta->getInputNum (); j++) {
+ /** user provides a external segment */
+ if (i == meta->getInputSegmentIndex (j)) {
+ if (input->bufs[j].type == BUFFER_DMABUF) {
+ hwmem = new HWmem (new HWmemExternal);
+ hwmem->setDriverAPI (getDriverAPI ());
+ hwmem->setDmabuf (input->bufs[j].dmabuf);
+ hwmem->setOffset (input->bufs[j].offset);
+ hwmem->setSize (input->bufs[j].size);
+ break;
+ }
+ }
+ }
+
+ if (hwmem == nullptr) {
+ hwmem = new HWmem (new HWmemDevice);
+ hwmem->setDriverAPI (getDriverAPI ());
+
+ int status = hwmem->alloc (size);
+ if (status != 0) {
+ logerr (TAG, "Failed to allocate %uth segment with size %u: %d\n",
+ i, size, status);
+ segments_.clear ();
+ return status;
+ }
+ }
+ }
+
+ setSegmentSlot (hwmem, i);
+ }
+
+ /** set invalid dmabuf FDs for unused segt slots */
+ for (uint32_t i = meta->getSegmentsNum (); i < MAX_SEGMENTS; i++)
+ reinterpret_cast<int32_t *>(getData())[i] = -1;
+
/** segment index validity is already checked in Metadata's checkSanity () */
num_total_segments_ = meta->getSegmentsNum ();
num_weight_segments_ = 1;
input_seg_idx_ = new uint32_t [num_input_segments_];
output_seg_idx_ = new uint32_t [num_output_segments_];
+ input_seg_off_ = new uint32_t [num_input_segments_];
+ output_seg_off_ = new uint32_t [num_output_segments_];
+
weight_seg_idx_[0] = meta->getWeightSegmentIndex ();
- for (uint32_t i = 0; i < num_input_segments_; i++)
+ for (uint32_t i = 0; i < num_input_segments_; i++) {
input_seg_idx_[i] = meta->getInputSegmentIndex (i);
- for (uint32_t i = 0; i < num_output_segments_; i++)
+ input_seg_off_[i] = meta->getInputSegmentOffset (i);
+ }
+ for (uint32_t i = 0; i < num_output_segments_; i++) {
output_seg_idx_[i] = meta->getOutputSegmentIndex (i);
-
- /** we need to consider the scenario that users provide input/weight as external dmabuf */
- for (uint32_t i = 0; i < meta->getSegmentsNum (); i++) {
- uint32_t size = meta->getSegmentSize (i);
- HWmem * hwmem;
-
- /** check model weight */
- if (i == weight_seg_idx_[0] && model->isExternal ()) {
- hwmem = new HWmem (new HWmemExternal);
- hwmem->setDriverAPI (getDriverAPI ());
- hwmem->setDmabuf (model->getDmabuf ());
- /** offset of weight base addr */
- hwmem->setOffset (model->getOffset () + meta->getMetaSize () + meta->getProgramSize ());
- hwmem->setSize (meta->getWeightSize ());
-
- setSegmentSlot (hwmem, i);
- continue;
- }
-
- /** check input */
- bool is_set = false;
- for (uint32_t j = 0; j < num_input_segments_; j++) {
- /** user provides a external segment */
- if (i == input_seg_idx_[j]) {
- if (input->bufs[j].type == BUFFER_DMABUF) {
- hwmem = new HWmem (new HWmemExternal);
- hwmem->setDriverAPI (getDriverAPI ());
- hwmem->setDmabuf (input->bufs[j].dmabuf);
- hwmem->setOffset (input->bufs[j].offset);
- hwmem->setSize (input->bufs[j].size);
-
- setSegmentSlot (hwmem, i);
- is_set = true;
- break;
- }
- }
- }
-
- if (is_set == false) {
- hwmem = new HWmem (new HWmemDevice);
- hwmem->setDriverAPI (getDriverAPI ());
-
- int status = hwmem->alloc (size);
- if (status != 0) {
- logerr (TAG, "Failed to allocate %uth segment with size %u: %d\n",
- i, size, status);
- segments_.clear ();
- delete [] weight_seg_idx_;
- delete [] input_seg_idx_;
- delete [] output_seg_idx_;
- return -EINVAL;
- }
-
- setSegmentSlot (hwmem, i);
- }
+ output_seg_off_[i] = meta->getOutputSegmentOffset (i);
}
return 0;
uint32_t seg_idx = weight_seg_idx_[idx];
assert (seg_idx < segments_.size ()); /** this is ensured in checkSanity() */
- return segments_ [seg_idx];
+ return segments_ [seg_idx].get();
}
/**
uint32_t seg_idx = input_seg_idx_[idx];
assert (seg_idx < segments_.size ()); /** this is ensured in checkSanity() */
- return segments_ [seg_idx];
+ return segments_ [seg_idx].get();
}
/**
uint32_t seg_idx = output_seg_idx_[idx];
assert (seg_idx < segments_.size ()); /** this is ensured in checkSanity() */
- return segments_ [seg_idx];
+ return segments_ [seg_idx].get();
+}
+
+/**
+ * @brief return input segment offset with the given index
+ * @param[in] idx segment index
+ * @return the offset of input segment
+ */
+uint32_t
+SegmentTable::getInputSegmentOffset (uint32_t idx)
+{
+ if (input_seg_idx_ == nullptr) {
+ logerr (TAG, "No valid segments in this table, maybe uninitialized?\n");
+ return 0;
+ }
+
+ if (idx >= num_input_segments_) {
+ logerr (TAG, "Invalid input segment index (%u). Should be less than %u\n",
+ idx, num_input_segments_);
+ return 0;
+ }
+
+ return input_seg_off_ [idx];
+}
+
+/**
+ * @brief return output segment offset with the given index
+ * @param[in] idx segment index
+ * @return the offset of output segment
+ */
+uint32_t
+SegmentTable::getOutputSegmentOffset (uint32_t idx)
+{
+ if (output_seg_idx_ == nullptr) {
+ logerr (TAG, "No valid segments in this table, maybe uninitialized?\n");
+ return 0;
+ }
+
+ if (idx >= num_output_segments_) {
+ logerr (TAG, "Invalid output segment index (%u). Should be less than %u\n",
+ idx, num_output_segments_);
+ return 0;
+ }
+
+ return output_seg_off_ [idx];
}
HWmem *getInputSegment (uint32_t idx);
HWmem *getOutputSegment (uint32_t idx);
+ uint32_t getInputSegmentOffset (uint32_t idx);
+ uint32_t getOutputSegmentOffset (uint32_t idx);
+
uint32_t getNumTotalSegments () const { return num_total_segments_; }
uint32_t getNumWeightSegments () const { return num_weight_segments_; }
uint32_t getNumInputSegments () const { return num_input_segments_; }
private:
void setSegmentSlot (HWmem *hwmem, int slot);
- std::vector <HWmem *> segments_; /**< segments */
+ std::vector <std::unique_ptr<HWmem>> segments_; /**< segments */
uint32_t * weight_seg_idx_; /**< segment index for weight */
uint32_t * input_seg_idx_; /**< segment index for input tensors */
uint32_t * output_seg_idx_; /**< segment index for output tensors */
+ uint32_t * input_seg_off_; /**< segment offset for input tensors */
+ uint32_t * output_seg_off_; /**< segment offset for output tensors */
+
uint32_t num_total_segments_; /**< number of total segments */
uint32_t num_weight_segments_; /**< number of weight segments */
uint32_t num_input_segments_; /**< number of input segments */
#include <memory>
#include <NPUemul.h>
+#include <npubinfmt.h>
#define MAX_EMUL_DEVICES (100)
if (input_config == nullptr)
return -EINVAL;
- /** TODO: dmabuf/model_id == dmabuf_fd */
-
EmulElement *elem_model = elem_map_.find (input_config->model_id);
if (elem_model == nullptr || elem_model->getAddr () == nullptr)
return -EINVAL;
+ /** either buffer or segment table */
+ EmulElement *elem_input = elem_map_.find (input_config->dmabuf_id);
+ if (elem_input == nullptr || elem_input->getAddr () == nullptr)
+ /** nullptr is okay; regard it as NOPs */
+ return 0;
+
+ char * addr_model = static_cast<char *>(elem_model->getAddr ());
+ char * addr_input = static_cast<char *>(elem_input->getAddr ());
+
/**
* call NPU C-emulation codes (AIP/NPU_SystemService_Emulator)
- * TODO: TRIV2 is not working until its emulation code is implemented
*/
if ((dev_type_ & DEVICETYPE_MASK) == DEVICETYPE_TRIV) {
- /** buffer can be nullptr */
- EmulElement *elem_buffer = elem_map_.find (input_config->dmabuf_id);
+ run_triv_emul (addr_model, addr_input);
+ } else if ((dev_type_ & DEVICETYPE_MASK) == DEVICETYPE_TRIV2) {
+#if 0
+ /** TODO: need extra variable in input_config */
+ uint32_t num_segs = input_config->num_segments;
+#else
+ uint32_t num_segs = MAX_SEGMENTS; /* we don't know exact # segments, but can find */
+#endif
+ char ** segment_table = new char* [num_segs];
+ /** set virtual address here */
+ for (uint32_t i = 0; i < num_segs; i++) {
+ int32_t dmabuf = reinterpret_cast<int32_t *>(addr_input)[i];
+ uint32_t offset = reinterpret_cast<uint32_t *>(addr_input + elem_input->getSize ()/2)[i];
+
+ /** TODO revise this error handling when 'input_config->num_segments' is added */
+ EmulElement *elem = elem_map_.find (dmabuf);
+ if (elem == nullptr) {
+ num_segs = i;
+ break;
+ }
+
+ /** Here, set virtual address for emulations */
+ segment_table[i] = static_cast<char *>(elem->getAddr ()) + offset;
+ }
- run_npu_emul (static_cast<char*>(elem_model->getAddr ()),
- elem_buffer != nullptr ? static_cast<char*>(elem_buffer->getAddr()) : nullptr);
+ run_triv2_emul (addr_model, segment_table, num_segs);
+ delete [] segment_table;
}
+
return 0;
}
-
int prepare_model () {
npubin_meta meta;
+ memset (&meta, '\x00', sizeof(npubin_meta));
+
meta.magiccode = NPUBIN_MAGICCODE | 0x3; /** npubinfmt v3 for TRIV2 */
meta.program_size = program_size;
meta.weight_size = weight_size;
meta.input_seg_num = 1;
meta.input_seg_idx[0] = 1;
+ meta.input_seg_off[0] = 0;
meta.input_seg_dims[0][0] = 1;
meta.input_seg_dims[0][1] = 1;
meta.input_seg_dims[0][2] = 1;
meta.output_seg_num = 1;
meta.output_seg_idx[0] = 1;
+ meta.output_seg_off[0] = 0;
meta.output_seg_dims[0][0] = 1;
meta.output_seg_dims[0][1] = 1;
meta.output_seg_dims[0][2] = 1;
--- /dev/null
+/**
+ * Proprietary
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+/**
+ * @file example_visa_triv2.c
+ * @date 14 May 2020
+ * @brief AppTest to test example visa binaries (triv2/npubinfmt v3)
+ * @author Dongju Chae <dongju.chae@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <libnpuhost.h>
+#include <npubinfmt.h>
+#include <ne_test_utils.h>
+
+#define MAX_FILE_LEN 256
+#define NPU_MODEL_NAME "model.tvn"
+
+/** @brief compare output tensor with the golden data */
+static int
+compare_data (const char *golden_path, const char *output_data,
+ uint64_t output_size)
+{
+ FILE *fp = fopen (golden_path, "r");
+ uint64_t idx;
+ int c, err = 0;
+
+ if (!fp) {
+ fprintf (stderr, "Fail to open %s\n", golden_path);
+ return -errno;
+ }
+
+ idx = 0;
+ while ((c = fgetc(fp)) != EOF) {
+ if (idx >= output_size) {
+ fprintf (stderr, "Out-of-range!\n");
+ err = -1;
+ goto out;
+ }
+ if ((uint8_t) c != (uint8_t) output_data[idx]) {
+ fprintf (stderr, "Hex diff at %" PRIu64 "; %#1x vs. %#1x\n",
+ idx, c, output_data[idx]);
+ err = -1;
+ goto out;
+ }
+ idx++;
+ }
+
+out:
+ fclose (fp);
+ return err;
+}
+
+/** @brief compare output buffers */
+static int
+compare_output_buffers (const npubin_meta *meta, const char *base_path,
+ const char *target, output_buffers *output)
+{
+ char golden_path[MAX_FILE_LEN];
+ char *output_data;
+ off_t output_size;
+ int err = 0;
+ uint32_t idx;
+
+ for (idx = 0; idx < output->num_buffers; idx++) {
+ output_data = output->bufs[idx].addr;
+ output_size = output->bufs[idx].size;
+
+ memset (golden_path, '\x00', MAX_FILE_LEN);
+ snprintf (golden_path, MAX_FILE_LEN, "%s/%s/output_fmap_%d.bin",
+ base_path, target, idx);
+
+ err = compare_data (golden_path, output_data, output_size);
+
+ assert (output->bufs[idx].type == BUFFER_MAPPED);
+ free (output->bufs[idx].addr);
+
+ if (err != 0)
+ break;
+ }
+
+ return err;
+}
+
+static void
+check_memory_leak (npudev_h dev)
+{
+ size_t alloc_total = 0;
+ size_t free_total = 0;
+ int status;
+
+ status = getNPU_memoryStatus (dev, &alloc_total, &free_total);
+ if (status != 0)
+ return;
+
+ if (alloc_total != free_total)
+ fprintf (stderr, "memory leak check failed (0x%zx != 0x%zx)\n",
+ free_total, alloc_total);
+}
+
+/** @brief run inference for each target visa binary (on sync mode) */
+static int
+run_inference_each (npudev_h dev, const char *base_path, const char *target)
+{
+ generic_buffer model;
+ input_buffers input;
+ output_buffers output;
+
+ char model_path[MAX_FILE_LEN];
+ char input_path[MAX_TENSORS][MAX_FILE_LEN];
+ off_t input_size;
+
+ npuConstraint constraint;
+ npubin_meta *meta;
+ uint32_t model_id;
+ uint32_t idx;
+ int err = 0;
+
+ /** 1: setup model (not dmabuf) */
+ memset (model_path, '\x00', MAX_FILE_LEN);
+ snprintf (model_path, MAX_FILE_LEN, "%s/%s/%s",
+ base_path, target, NPU_MODEL_NAME);
+
+ meta = getNPUmodel_metadata (model_path, false);
+ if (meta == NULL) {
+ fprintf (stderr, "Fail to get the metadata of %s\n", model_path);
+ return -EINVAL;
+ }
+
+ model.size = get_file_size (model_path);
+ model.filepath = model_path;
+ model.type = BUFFER_FILE;
+
+ /** 2: setup input buffers */
+ if (NPUBIN_VERSION (meta->magiccode) != 3) {
+ fprintf (stderr, "Support only npubinfmt v3\n");
+ err = -EINVAL;
+ goto out_free_meta;
+ }
+
+ for (idx = 0; idx < meta->input_seg_num; idx++) {
+ memset (input_path[idx], '\x00', MAX_FILE_LEN);
+ snprintf (input_path[idx], MAX_FILE_LEN, "%s/%s/input_fmap_%d.bin",
+ base_path, target, idx);
+
+ input_size = get_file_size (input_path[idx]);
+ if (input_size <= 0) {
+ fprintf (stderr, "Wrong metadata; need %d input tensors\n", meta->input_seg_num);
+ err = -EINVAL;
+ goto out_free_meta;
+ }
+
+ input.bufs[idx].size = input_size;
+ input.bufs[idx].type = BUFFER_FILE;
+ input.bufs[idx].filepath = input_path[idx];
+ }
+
+ input.num_buffers = meta->input_seg_num;
+
+ /** 3: allocate input buffers (it's not mandatory to use this API) */
+ if ((err = allocNPU_inputBuffers (dev, &input)) != 0) {
+ fprintf (stderr, "Fail to allocate NPU input buffer (errno %d)\n", err);
+ goto out_free_meta;
+ }
+
+ /** 4: register the model to NPU Engine */
+ if ((err = registerNPUmodel (dev, &model, &model_id)) != 0) {
+ fprintf (stderr, "Fail to registerNPU model (errno %d)\n", err);
+ goto out_clean;
+ }
+
+ /** 4-1: provide additional information to NPU Engine */
+ {
+ tensors_data_info info_in;
+ tensors_data_info info_out;
+
+ /* No data manipulation & quantization in this test */
+
+ info_in.num_info = meta->input_seg_num;
+ for (idx = 0; idx < info_in.num_info; idx++) {
+ info_in.info[idx].layout = DATA_LAYOUT_TRIV2;
+ info_in.info[idx].type = DATA_TYPE_QASYMM8;
+ }
+
+ info_out.num_info = meta->output_seg_num;
+ for (idx = 0; idx < info_out.num_info; idx++) {
+ info_out.info[idx].layout = DATA_LAYOUT_TRIV2;
+ info_out.info[idx].type = DATA_TYPE_QASYMM8;
+ }
+
+ if ((err = setNPU_dataInfo (dev, model_id, &info_in, &info_out)) != 0) {
+ fprintf (stderr, "Failed to set the information for NPU data\n");
+ goto out_unregister;
+ }
+ }
+
+ /** 4-2: set constraints */
+ constraint.timeout_ms = 100;
+ constraint.priority = NPU_PRIORITY_MID;
+ if ((err = setNPU_constraint (dev, model_id, constraint)) != 0)
+ goto out_unregister;
+
+ /** 5: run NPU inference (sync) */
+ if ((err = runNPU_sync (dev, model_id, &input, &output)) != 0)
+ goto out_unregister;
+
+ /** 6: compare output buffers */
+ err = compare_output_buffers (meta, base_path, target, &output);
+
+out_unregister:
+ unregisterNPUmodel (dev, model_id);
+out_clean:
+ cleanNPU_inputBuffers (dev, &input);
+out_free_meta:
+ free (meta);
+
+ /** ensure that all memory (allocated by libnpuhost APIs) are successfully freed */
+ check_memory_leak (dev);
+
+ return err;
+}
+
+/** @brief apptest main */
+int
+main (int argc, char **argv)
+{
+ dev_type type = NPUCOND_TRIV2_CONN_SOCIP;
+ npudev_h dev;
+
+ if (argc != 2) {
+ fprintf (stderr, "[APPTEST] %s: SKIPPED\n", argv[0]);
+ return 0;
+ }
+
+ if (!(getnumNPUdeviceByType (type) > 0 &&
+ getNPUdeviceByType (&dev, type, 0) == 0)) {
+ fprintf(stderr, "No available TRIV2 device\n");
+ return -1;
+ }
+
+ return run_apptest (dev, argv, &run_inference_each);
+}
install_dir : join_paths(ne_bindir, 'apptests')
)
+executable ('apptest_example_visa_triv2',
+ 'example_visa_triv2.c',
+ include_directories : ne_apptest_inc,
+ dependencies : ne_test_utils_dep,
+ link_with : ne_library_shared,
+ install : true,
+ install_rpath : ne_libdir,
+ install_dir : join_paths(ne_bindir, 'apptests')
+)
+
## TRIA (Trinity Asr): disabled until the TRIA driver is prepared
#executable ('apptest_dummy_inference_asr',
# 'dummy_inference_asr.c',
ASSERT_NE (api.get(), nullptr);
std::unique_ptr<Model> model (new Model (new HWmemDevice));
+ model->setDriverAPI (api.get ());
npubin_meta meta;
create_metadata_v3 (meta);
ASSERT_EQ (model->setMetadata (&meta), 0);
+ ASSERT_EQ (model->alloc (meta.size), 0);
std::unique_ptr<SegmentTable> table (new SegmentTable (new HWmemDevice));
table->setDriverAPI (api.get ());
ASSERT_NE (api.get(), nullptr);
std::unique_ptr<Model> model (new Model (new HWmemDevice));
+ model->setDriverAPI (api.get ());
npubin_meta meta;
create_metadata_v3 (meta);
ASSERT_EQ (model->setMetadata (&meta), 0);
+ ASSERT_EQ (model->alloc (meta.size), 0);
std::unique_ptr<SegmentTable> table (new SegmentTable (new HWmemDevice));
table->setDriverAPI (api.get ());
ASSERT_NE (api.get(), nullptr);
std::unique_ptr<Model> model (new Model (new HWmemDevice));
+ model->setDriverAPI (api.get ());
npubin_meta meta;
create_metadata_v3 (meta);
ASSERT_EQ (model->setMetadata (&meta), 0);
+ ASSERT_EQ (model->alloc (meta.size), 0);
std::unique_ptr<SegmentTable> table (new SegmentTable (new HWmemDevice));
/** before allocating segment table */
EXPECT_NE (table->createSegments (model.get(), &input), 0);
+ table.reset (new SegmentTable (new HWmemDevice));
+ table->setDriverAPI (api.get ());
ASSERT_EQ (table->alloc (), 0);
/** invalid metadata (only support v3) */
meta.magiccode = NPUBIN_MAGICCODE | 0x2; /* v2 */