From 5565b73d71c0ace11bf0afbc890736aed2fea357 Mon Sep 17 00:00:00 2001 From: Dongju Chae Date: Fri, 11 Jun 2021 16:07:05 +0900 Subject: [PATCH] [NPUMGR/Dummy] Enable NHWC format handling This patch enables NHWC format handling. Signed-off-by: Dongju Chae --- src/core/ne-handler.cc | 87 +++++++++++++++-------------- src/core/ne-scheduler.cc | 3 +- src/core/ne-scheduler.h | 8 ++- tests/apptests/npumgr/dummy/npumgr_api.h | 1 - tests/apptests/npumgr/dummy/npumgr_triv2.cc | 67 +++++++++++++++------- tests/apptests/npumgr/npumgr_test.cc | 8 +-- 6 files changed, 106 insertions(+), 68 deletions(-) diff --git a/src/core/ne-handler.cc b/src/core/ne-handler.cc index f68b531..9cffcd9 100644 --- a/src/core/ne-handler.cc +++ b/src/core/ne-handler.cc @@ -275,7 +275,7 @@ HostHandler::getModel (uint32_t modelid) { /** @brief dummay callback for runSync. */ class callbackSync { public: - callbackSync (output_buffers *output) : output_ (output), done_ (false) {} + callbackSync () : done_ (false) {} static void callback (output_buffers *output, int req_id, void *data) { callbackSync *sync = static_cast (data); @@ -283,10 +283,6 @@ class callbackSync { } void callback (output_buffers *output, int req_id) { - if (output_ != nullptr && output != nullptr) { - /** just copy internal variables of output buffers */ - memcpy (output_, output, sizeof (output_buffers)); - } done_ = true; cv_.notify_one (); } @@ -299,7 +295,6 @@ class callbackSync { private: std::mutex m_; std::condition_variable cv_; - output_buffers *output_; bool done_; }; @@ -334,7 +329,7 @@ HostHandler::runModel (uint32_t modelid, npu_infer_mode mode, switch (mode) { case NPU_INFER_BLOCKING: { - callbackSync sync (output); + callbackSync sync; req_id = device_->run (NPUINPUT_HOST, model, input, output, callbackSync::callback, static_cast (&sync)); @@ -1002,6 +997,7 @@ TrinityVision2::run (npu_input_opmode opmode, const Model *model, Request *req = new Request (opmode); req->setModel (model); req->setInferData (segt); + req->setOutputBuffers (output); req->setCallback ( std::bind (&TrinityVision2::callback, this, req, cb, cb_data)); @@ -1046,57 +1042,66 @@ TrinityVision2::callback (Request *req, npuOutputNotify cb, void *cb_data) { /** internal logic error */ assert (segt != nullptr); - output_buffers output = {.num_buffers = segt->getNumOutputSegments ()}; + output_buffers *output = req->getOutputBuffers (); + /* user didn't provide valid output buffers. So, pass internally allocated one */ + if (output == nullptr) { + output = new output_buffers; + memset (output, '\x00', sizeof (output_buffers)); + } + if (output->num_buffers == 0) + output->num_buffers = segt->getNumOutputSegments (); - for (uint32_t idx = 0; idx < output.num_buffers; idx++) { + /* perform data manipulation such as layout conversion and de-quantization */ + for (uint32_t idx = 0; idx < output->num_buffers; idx++) { uint32_t output_tensor_size = model->getOutputTensorSize (idx); HWmem *output_segment = segt->getOutputSegment (idx); - if (output_segment->isExternal ()) { - output.bufs[idx].type = BUFFER_DMABUF; - output.bufs[idx].size = output_segment->getSize (); - output.bufs[idx].addr = output_segment->getData (); - output.bufs[idx].dmabuf = output_segment->getDmabuf (); - output.bufs[idx].offset = output_segment->getOffset (); - } else { - output.bufs[idx].type = BUFFER_MAPPED; - output.bufs[idx].size = output_tensor_size; + if (output->bufs[idx].type == BUFFER_DMABUF) { + /* it's external memory. can't do */ + continue; + } else if (output->bufs[idx].addr == nullptr || + output->bufs[idx].size == 0) { + output->bufs[idx].type = BUFFER_MAPPED; + output->bufs[idx].size = output_tensor_size; /** user needs to free this */ - output.bufs[idx].addr = calloc (1, output_tensor_size); - if (output.bufs[idx].addr == NULL) { + output->bufs[idx].addr = calloc (1, output_tensor_size); + if (output->bufs[idx].addr == NULL) { logerr (TAG, "Unable to allocate output buffer\n"); break; } + } - auto func = std::bind (TrinityVision2::manipulateData, model, idx, false, - std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3); - void *dst; + auto func = std::bind (TrinityVision2::manipulateData, model, idx, false, + std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3); + void *dst; #ifdef __FPGA__ - /* this is fpga workaround codes for syncing output data */ - dst = calloc (1, output_tensor_size); - if (dst == NULL) { - logerr (TAG, "Unable to allocate FPGA temp buffer\n"); - break; - } - api_->fpga_memcpy (output_segment->getDmabuf (), - segt->getOutputSegmentOffset (idx), dst, - output_tensor_size); + /* this is fpga workaround codes for syncing output data */ + dst = calloc (1, output_tensor_size); + if (dst == NULL) { + logerr (TAG, "Unable to allocate FPGA temp buffer\n"); + break; + } + api_->fpga_memcpy (output_segment->getDmabuf (), + segt->getOutputSegmentOffset (idx), dst, + output_tensor_size); #else - dst = output_segment->getData () + segt->getOutputSegmentOffset (idx); + dst = output_segment->getData () + segt->getOutputSegmentOffset (idx); #endif - int status = comm_.insertGenericBuffer (dst, &output.bufs[idx], func); - if (status != 0) { - logerr (TAG, "Failed to return output buffer: %d\n", status); - } + int status = comm_.insertGenericBuffer (dst, &output->bufs[idx], func); + if (status != 0) { + logerr (TAG, "Failed to return output buffer: %d\n", status); + } + #ifdef __FPGA__ - free (dst); + free (dst); #endif - } } - cb (&output, req->getID (), cb_data); + cb (output, req->getID (), cb_data); + if (req->getOutputBuffers () != output) + delete output; delete segt; } diff --git a/src/core/ne-scheduler.cc b/src/core/ne-scheduler.cc index 8e3d21b..db3fd75 100644 --- a/src/core/ne-scheduler.cc +++ b/src/core/ne-scheduler.cc @@ -27,7 +27,8 @@ Request::Request (npu_input_opmode opmode) stopped_ (false), model_ (nullptr), data_ (nullptr), - cb_ (nullptr) { + cb_ (nullptr), + out_bufs_ (nullptr) { request_id_ = Request::global_request_id_.fetch_add (1); } diff --git a/src/core/ne-scheduler.h b/src/core/ne-scheduler.h index 0e58a39..2f6b3f4 100644 --- a/src/core/ne-scheduler.h +++ b/src/core/ne-scheduler.h @@ -53,6 +53,9 @@ class Request { void setHwDevice (std::string hw_dev) { hw_dev_ = hw_dev; } std::string getHwDevice () { return hw_dev_; } + void setOutputBuffers (output_buffers *out_bufs) { out_bufs_ = out_bufs; } + output_buffers *getOutputBuffers () { return out_bufs_; } + private: static std::atomic global_request_id_; int request_id_; /**< request id */ @@ -64,8 +67,9 @@ class Request { const Model *model_; /**< model of the request */ HWmem *data_; /**< inference data of the request */ - outputCallback cb_; /**< request callback */ - std::string hw_dev_; /**< HW device path */ + outputCallback cb_; /**< request callback */ + output_buffers *out_bufs_; /**< output buffers */ + std::string hw_dev_; /**< HW device path */ }; /** @brief class def. of scheduler to handle requests */ diff --git a/tests/apptests/npumgr/dummy/npumgr_api.h b/tests/apptests/npumgr/dummy/npumgr_api.h index 6fae8e7..c17dcf8 100644 --- a/tests/apptests/npumgr/dummy/npumgr_api.h +++ b/tests/apptests/npumgr/dummy/npumgr_api.h @@ -165,7 +165,6 @@ typedef enum _npumgr_tensor_qnt_type { typedef enum _npumgr_tensor_fmt { NPUMGR_TENSOR_FMT_NCHW = 0, NPUMGR_TENSOR_FMT_NHWC, - NPUMGR_TENSOR_FMT_TRIV2, NPUMGR_TENSOR_FMT_MAX }npumgr_tensor_fmt_t; diff --git a/tests/apptests/npumgr/dummy/npumgr_triv2.cc b/tests/apptests/npumgr/dummy/npumgr_triv2.cc index e8b473d..c6cb0cf 100644 --- a/tests/apptests/npumgr/dummy/npumgr_triv2.cc +++ b/tests/apptests/npumgr/dummy/npumgr_triv2.cc @@ -90,9 +90,6 @@ convert_layout (const npumgr_tensor_fmt_t &fmt) { case NPUMGR_TENSOR_FMT_NHWC: layout = DATA_LAYOUT_NHWC; break; - case NPUMGR_TENSOR_FMT_TRIV2: - layout = DATA_LAYOUT_TRIV2; - break; default: break; } @@ -133,6 +130,28 @@ convert_type (const npumgr_tensor_data_t &data, return type; } +static gsize +calc_data_size (const npumgr_tensor_data_t type) { + switch (type) { + case NPUMGR_TENSOR_DATA_FLOAT32: + return 4; + case NPUMGR_TENSOR_DATA_FLOAT16: + return 2; + case NPUMGR_TENSOR_DATA_INT8: + return 1; + case NPUMGR_TENSOR_DATA_UINT8: + return 1; + case NPUMGR_TENSOR_DATA_INT16: + return 2; + case NPUMGR_TENSOR_DATA_UINT16: + return 2; + case NPUMGR_TENSOR_DATA_UINT24: + return 3; + default: + return 0; + } +} + /** * @brief Class for triv2 npumgr network */ @@ -228,14 +247,20 @@ class NpumgrNetworkTriv2 { guint getInTensorDim (guint i, guint j) { return meta_->input_seg_dims[i][j]; } - guint getInTensorSize (guint i) { + guint getInTensorSize (guint i, npumgr_tensor_fmt_t fmt, + npumgr_tensor_data_t type) { guint size; - int status; - status = getNPUmodel_tensorSize (dev_, model_id_, true, i, &size); - if (status != 0) - return 0; + if (fmt == NPUMGR_TENSOR_FMT_NHWC || fmt == NPUMGR_TENSOR_FMT_NCHW) { + size = calc_data_size (type); + for (int j = 0; j < MAX_RANK; j++) size *= meta_->input_seg_dims[i][j]; + } else { + int status; + status = getNPUmodel_tensorSize (dev_, model_id_, true, i, &size); + if (status != 0) + return 0; + } return size; } int32_t getInTensorQuantZero (guint i) { return meta_->input_seg_quant_z[i]; } @@ -244,14 +269,20 @@ class NpumgrNetworkTriv2 { guint getOutTensorDim (gint i, gint j) { return meta_->output_seg_dims[i][j]; } - guint getOutTensorSize (guint i) { + guint getOutTensorSize (guint i, npumgr_tensor_fmt_t fmt, + npumgr_tensor_data_t type) { guint size; - int status; - status = getNPUmodel_tensorSize (dev_, model_id_, false, i, &size); - if (status != 0) - return 0; + if (fmt == NPUMGR_TENSOR_FMT_NHWC || fmt == NPUMGR_TENSOR_FMT_NCHW) { + size = calc_data_size (type); + for (int j = 0; j < MAX_RANK; j++) size *= meta_->output_seg_dims[i][j]; + } else { + int status; + status = getNPUmodel_tensorSize (dev_, model_id_, false, i, &size); + if (status != 0) + return 0; + } return size; } int32_t getOutTensorQuantZero (gint i) { @@ -269,7 +300,6 @@ class NpumgrNetworkTriv2 { memcpy (&in_buffers_.bufs[index], buffer->getGenericBuffer (), sizeof (generic_buffer)); - in_buffers_.bufs[index].type = BUFFER_DMABUF; return TRUE; } @@ -283,7 +313,6 @@ class NpumgrNetworkTriv2 { memcpy (&out_buffers_.bufs[index], buffer->getGenericBuffer (), sizeof (generic_buffer)); - out_buffers_.bufs[index].type = BUFFER_DMABUF; return TRUE; } @@ -1027,9 +1056,9 @@ triv2_query_input (NpumgrDevice *device, npumgr_context ctx_handle, for (guint rank = 0; rank < MAX_RANK; rank++) attr->strides[rank] = 1; attr->plane = 3; - attr->size = network->getInTensorSize (index); - attr->fmt = NPUMGR_TENSOR_FMT_TRIV2; + attr->fmt = NPUMGR_TENSOR_FMT_NHWC; attr->type = NPUMGR_TENSOR_DATA_UINT8; + attr->size = network->getInTensorSize (index, attr->fmt, attr->type); attr->quant_type = NPUMGR_TENSOR_QNT_AFFINE_ASYMM; attr->quant_data.affine.scale = network->getInTensorQuantScale (index); @@ -1067,9 +1096,9 @@ triv2_query_output (NpumgrDevice *device, npumgr_context ctx_handle, for (guint rank = 0; rank < MAX_RANK; rank++) attr->strides[rank] = 1; attr->plane = 3; - attr->size = network->getOutTensorSize (index); - attr->fmt = NPUMGR_TENSOR_FMT_TRIV2; + attr->fmt = NPUMGR_TENSOR_FMT_NHWC; attr->type = NPUMGR_TENSOR_DATA_UINT8; + attr->size = network->getOutTensorSize (index, attr->fmt, attr->type); attr->quant_type = NPUMGR_TENSOR_QNT_AFFINE_ASYMM; attr->quant_data.affine.scale = network->getOutTensorQuantScale (index); diff --git a/tests/apptests/npumgr/npumgr_test.cc b/tests/apptests/npumgr/npumgr_test.cc index 8b7ddbd..9f8f670 100644 --- a/tests/apptests/npumgr/npumgr_test.cc +++ b/tests/apptests/npumgr/npumgr_test.cc @@ -105,8 +105,8 @@ start_npumgr_test (int fd, const string &dir) { goto destroy_all; } - /** TODO: currently, support TRIV2 (NHWC-based) format */ - if (attr.fmt != NPUMGR_TENSOR_FMT_TRIV2) { + /** TODO: currently, support NHWC format only */ + if (attr.fmt != NPUMGR_TENSOR_FMT_NHWC) { cerr << "Other format is not supported yet, " << attr.fmt << "\n"; fclose (f); goto destroy_all; @@ -160,8 +160,8 @@ start_npumgr_test (int fd, const string &dir) { goto destroy_all; } - /** TODO: currently, support TRIV2 (NHWC-based) format */ - if (attr.fmt != NPUMGR_TENSOR_FMT_TRIV2) { + /** TODO: currently, support NHWC format only */ + if (attr.fmt != NPUMGR_TENSOR_FMT_NHWC) { cerr << "Other format is not supported yet, " << attr.fmt << "\n"; goto destroy_all; } -- 2.7.4