/** @brief dummay callback for runSync. */
class callbackSync {
public:
- callbackSync (output_buffers *output) : output_ (output), done_ (false) {}
+ callbackSync () : done_ (false) {}
static void callback (output_buffers *output, int req_id, void *data) {
callbackSync *sync = static_cast<callbackSync *> (data);
}
void callback (output_buffers *output, int req_id) {
- if (output_ != nullptr && output != nullptr) {
- /** just copy internal variables of output buffers */
- memcpy (output_, output, sizeof (output_buffers));
- }
done_ = true;
cv_.notify_one ();
}
private:
std::mutex m_;
std::condition_variable cv_;
- output_buffers *output_;
bool done_;
};
switch (mode) {
case NPU_INFER_BLOCKING: {
- callbackSync sync (output);
+ callbackSync sync;
req_id =
device_->run (NPUINPUT_HOST, model, input, output,
callbackSync::callback, static_cast<void *> (&sync));
Request *req = new Request (opmode);
req->setModel (model);
req->setInferData (segt);
+ req->setOutputBuffers (output);
req->setCallback (
std::bind (&TrinityVision2::callback, this, req, cb, cb_data));
/** internal logic error */
assert (segt != nullptr);
- output_buffers output = {.num_buffers = segt->getNumOutputSegments ()};
+ output_buffers *output = req->getOutputBuffers ();
+ /* user didn't provide valid output buffers. So, pass internally allocated one */
+ if (output == nullptr) {
+ output = new output_buffers;
+ memset (output, '\x00', sizeof (output_buffers));
+ }
+ if (output->num_buffers == 0)
+ output->num_buffers = segt->getNumOutputSegments ();
- for (uint32_t idx = 0; idx < output.num_buffers; idx++) {
+ /* perform data manipulation such as layout conversion and de-quantization */
+ for (uint32_t idx = 0; idx < output->num_buffers; idx++) {
uint32_t output_tensor_size = model->getOutputTensorSize (idx);
HWmem *output_segment = segt->getOutputSegment (idx);
- if (output_segment->isExternal ()) {
- output.bufs[idx].type = BUFFER_DMABUF;
- output.bufs[idx].size = output_segment->getSize ();
- output.bufs[idx].addr = output_segment->getData ();
- output.bufs[idx].dmabuf = output_segment->getDmabuf ();
- output.bufs[idx].offset = output_segment->getOffset ();
- } else {
- output.bufs[idx].type = BUFFER_MAPPED;
- output.bufs[idx].size = output_tensor_size;
+ if (output->bufs[idx].type == BUFFER_DMABUF) {
+ /* it's external memory. can't do */
+ continue;
+ } else if (output->bufs[idx].addr == nullptr ||
+ output->bufs[idx].size == 0) {
+ output->bufs[idx].type = BUFFER_MAPPED;
+ output->bufs[idx].size = output_tensor_size;
/** user needs to free this */
- output.bufs[idx].addr = calloc (1, output_tensor_size);
- if (output.bufs[idx].addr == NULL) {
+ output->bufs[idx].addr = calloc (1, output_tensor_size);
+ if (output->bufs[idx].addr == NULL) {
logerr (TAG, "Unable to allocate output buffer\n");
break;
}
+ }
- auto func = std::bind (TrinityVision2::manipulateData, model, idx, false,
- std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3);
- void *dst;
+ auto func = std::bind (TrinityVision2::manipulateData, model, idx, false,
+ std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3);
+ void *dst;
#ifdef __FPGA__
- /* this is fpga workaround codes for syncing output data */
- dst = calloc (1, output_tensor_size);
- if (dst == NULL) {
- logerr (TAG, "Unable to allocate FPGA temp buffer\n");
- break;
- }
- api_->fpga_memcpy (output_segment->getDmabuf (),
- segt->getOutputSegmentOffset (idx), dst,
- output_tensor_size);
+ /* this is fpga workaround codes for syncing output data */
+ dst = calloc (1, output_tensor_size);
+ if (dst == NULL) {
+ logerr (TAG, "Unable to allocate FPGA temp buffer\n");
+ break;
+ }
+ api_->fpga_memcpy (output_segment->getDmabuf (),
+ segt->getOutputSegmentOffset (idx), dst,
+ output_tensor_size);
#else
- dst = output_segment->getData () + segt->getOutputSegmentOffset (idx);
+ dst = output_segment->getData () + segt->getOutputSegmentOffset (idx);
#endif
- int status = comm_.insertGenericBuffer (dst, &output.bufs[idx], func);
- if (status != 0) {
- logerr (TAG, "Failed to return output buffer: %d\n", status);
- }
+ int status = comm_.insertGenericBuffer (dst, &output->bufs[idx], func);
+ if (status != 0) {
+ logerr (TAG, "Failed to return output buffer: %d\n", status);
+ }
+
#ifdef __FPGA__
- free (dst);
+ free (dst);
#endif
- }
}
- cb (&output, req->getID (), cb_data);
+ cb (output, req->getID (), cb_data);
+ if (req->getOutputBuffers () != output)
+ delete output;
delete segt;
}
case NPUMGR_TENSOR_FMT_NHWC:
layout = DATA_LAYOUT_NHWC;
break;
- case NPUMGR_TENSOR_FMT_TRIV2:
- layout = DATA_LAYOUT_TRIV2;
- break;
default:
break;
}
return type;
}
+static gsize
+calc_data_size (const npumgr_tensor_data_t type) {
+ switch (type) {
+ case NPUMGR_TENSOR_DATA_FLOAT32:
+ return 4;
+ case NPUMGR_TENSOR_DATA_FLOAT16:
+ return 2;
+ case NPUMGR_TENSOR_DATA_INT8:
+ return 1;
+ case NPUMGR_TENSOR_DATA_UINT8:
+ return 1;
+ case NPUMGR_TENSOR_DATA_INT16:
+ return 2;
+ case NPUMGR_TENSOR_DATA_UINT16:
+ return 2;
+ case NPUMGR_TENSOR_DATA_UINT24:
+ return 3;
+ default:
+ return 0;
+ }
+}
+
/**
* @brief Class for triv2 npumgr network
*/
guint getInTensorDim (guint i, guint j) {
return meta_->input_seg_dims[i][j];
}
- guint getInTensorSize (guint i) {
+ guint getInTensorSize (guint i, npumgr_tensor_fmt_t fmt,
+ npumgr_tensor_data_t type) {
guint size;
- int status;
- status = getNPUmodel_tensorSize (dev_, model_id_, true, i, &size);
- if (status != 0)
- return 0;
+ if (fmt == NPUMGR_TENSOR_FMT_NHWC || fmt == NPUMGR_TENSOR_FMT_NCHW) {
+ size = calc_data_size (type);
+ for (int j = 0; j < MAX_RANK; j++) size *= meta_->input_seg_dims[i][j];
+ } else {
+ int status;
+ status = getNPUmodel_tensorSize (dev_, model_id_, true, i, &size);
+ if (status != 0)
+ return 0;
+ }
return size;
}
int32_t getInTensorQuantZero (guint i) { return meta_->input_seg_quant_z[i]; }
guint getOutTensorDim (gint i, gint j) {
return meta_->output_seg_dims[i][j];
}
- guint getOutTensorSize (guint i) {
+ guint getOutTensorSize (guint i, npumgr_tensor_fmt_t fmt,
+ npumgr_tensor_data_t type) {
guint size;
- int status;
- status = getNPUmodel_tensorSize (dev_, model_id_, false, i, &size);
- if (status != 0)
- return 0;
+ if (fmt == NPUMGR_TENSOR_FMT_NHWC || fmt == NPUMGR_TENSOR_FMT_NCHW) {
+ size = calc_data_size (type);
+ for (int j = 0; j < MAX_RANK; j++) size *= meta_->output_seg_dims[i][j];
+ } else {
+ int status;
+ status = getNPUmodel_tensorSize (dev_, model_id_, false, i, &size);
+ if (status != 0)
+ return 0;
+ }
return size;
}
int32_t getOutTensorQuantZero (gint i) {
memcpy (&in_buffers_.bufs[index], buffer->getGenericBuffer (),
sizeof (generic_buffer));
- in_buffers_.bufs[index].type = BUFFER_DMABUF;
return TRUE;
}
memcpy (&out_buffers_.bufs[index], buffer->getGenericBuffer (),
sizeof (generic_buffer));
- out_buffers_.bufs[index].type = BUFFER_DMABUF;
return TRUE;
}
for (guint rank = 0; rank < MAX_RANK; rank++) attr->strides[rank] = 1;
attr->plane = 3;
- attr->size = network->getInTensorSize (index);
- attr->fmt = NPUMGR_TENSOR_FMT_TRIV2;
+ attr->fmt = NPUMGR_TENSOR_FMT_NHWC;
attr->type = NPUMGR_TENSOR_DATA_UINT8;
+ attr->size = network->getInTensorSize (index, attr->fmt, attr->type);
attr->quant_type = NPUMGR_TENSOR_QNT_AFFINE_ASYMM;
attr->quant_data.affine.scale = network->getInTensorQuantScale (index);
for (guint rank = 0; rank < MAX_RANK; rank++) attr->strides[rank] = 1;
attr->plane = 3;
- attr->size = network->getOutTensorSize (index);
- attr->fmt = NPUMGR_TENSOR_FMT_TRIV2;
+ attr->fmt = NPUMGR_TENSOR_FMT_NHWC;
attr->type = NPUMGR_TENSOR_DATA_UINT8;
+ attr->size = network->getOutTensorSize (index, attr->fmt, attr->type);
attr->quant_type = NPUMGR_TENSOR_QNT_AFFINE_ASYMM;
attr->quant_data.affine.scale = network->getOutTensorQuantScale (index);