2 * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "mv_private.h"
18 #include "Inference.h"
19 #include "InferenceIni.h"
29 #define MV_INFERENCE_OUTPUT_NUMBERS_MAX 10
30 #define MV_INFERENCE_OUTPUT_NUMBERS_MIN 1
31 #define MV_INFERENCE_CONFIDENCE_THRESHOLD_MAX 1.0
32 #define MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN 0.0
48 InferenceConfig::InferenceConfig() :
52 mDataType(MV_INFERENCE_DATA_FLOAT32),
53 mBackedType(MV_INFERENCE_BACKEND_NONE),
54 mTargetTypes(MV_INFERENCE_TARGET_NONE),
55 mConfidenceThresHold(),
60 mTensorInfo.width = -1;
61 mTensorInfo.height = -1;
66 Inference::Inference() :
70 mSupportedInferenceBackend(),
71 mInputSize(cv::Size()),
78 mSourceSize(cv::Size()),
79 mInputBuffer(cv::Mat()),
85 mSupportedInferenceBackend.insert(std::make_pair(
86 MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false)));
87 mSupportedInferenceBackend.insert(std::make_pair(
88 MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false)));
89 mSupportedInferenceBackend.insert(std::make_pair(
90 MV_INFERENCE_BACKEND_ARMNN, std::make_pair("armnn", false)));
91 mSupportedInferenceBackend.insert(std::make_pair(
92 MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false)));
93 mSupportedInferenceBackend.insert(std::make_pair(
94 MV_INFERENCE_BACKEND_NNFW, std::make_pair("mlapi", false)));
96 CheckSupportedInferenceBackend();
98 for (int i = 0; i < MV_INFERENCE_BACKEND_MAX; ++i) {
99 auto iter = mSupportedInferenceBackend.find(i);
100 LOGE("%d: %s: %s", i, (iter->second).first.c_str(),
101 (iter->second).second ? "TRUE" : "FALSE");
104 mModelFormats.insert(std::make_pair<std::string, int>(
105 "caffemodel", INFERENCE_MODEL_CAFFE));
106 mModelFormats.insert(
107 std::make_pair<std::string, int>("pb", INFERENCE_MODEL_TF));
108 mModelFormats.insert(std::make_pair<std::string, int>(
109 "tflite", INFERENCE_MODEL_TFLITE));
110 mModelFormats.insert(
111 std::make_pair<std::string, int>("t7", INFERENCE_MODEL_TORCH));
112 mModelFormats.insert(std::make_pair<std::string, int>(
113 "weights", INFERENCE_MODEL_DARKNET));
114 mModelFormats.insert(
115 std::make_pair<std::string, int>("bin", INFERENCE_MODEL_DLDT));
116 mModelFormats.insert(
117 std::make_pair<std::string, int>("onnx", INFERENCE_MODEL_ONNX));
118 mModelFormats.insert(std::make_pair<std::string, int>(
119 "nb", INFERENCE_MODEL_VIVANTE));
124 Inference::~Inference()
126 CleanupTensorBuffers();
128 if (!mInputLayerProperty.tensor_infos.empty()) {
129 mInputLayerProperty.tensor_infos.clear();
130 std::vector<inference_engine_tensor_info>().swap(
131 mInputLayerProperty.tensor_infos);
133 if (!mOutputLayerProperty.tensor_infos.empty()) {
134 mOutputLayerProperty.tensor_infos.clear();
135 std::vector<inference_engine_tensor_info>().swap(
136 mOutputLayerProperty.tensor_infos);
139 mModelFormats.clear();
141 // Release backend engine.
143 mBackend->UnbindBackend();
147 LOGI("Released backend engine.");
150 void Inference::CheckSupportedInferenceBackend()
157 std::vector<int> supportedBackend = ini.GetSupportedInferenceEngines();
158 for (std::vector<int>::const_iterator it = supportedBackend.begin();
159 it != supportedBackend.end(); ++it) {
160 LOGE("engine: %d", *it);
162 auto iter = mSupportedInferenceBackend.find(*it);
163 (iter->second).second = true;
169 int Inference::ConvertEngineErrorToVisionError(int error)
171 int ret = MEDIA_VISION_ERROR_NONE;
174 case INFERENCE_ENGINE_ERROR_NONE:
175 ret = MEDIA_VISION_ERROR_NONE;
177 case INFERENCE_ENGINE_ERROR_NOT_SUPPORTED:
178 ret = MEDIA_VISION_ERROR_NOT_SUPPORTED;
180 case INFERENCE_ENGINE_ERROR_MSG_TOO_LONG:
181 ret = MEDIA_VISION_ERROR_MSG_TOO_LONG;
183 case INFERENCE_ENGINE_ERROR_NO_DATA:
184 ret = MEDIA_VISION_ERROR_NO_DATA;
186 case INFERENCE_ENGINE_ERROR_KEY_NOT_AVAILABLE:
187 ret = MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE;
189 case INFERENCE_ENGINE_ERROR_OUT_OF_MEMORY:
190 ret = MEDIA_VISION_ERROR_OUT_OF_MEMORY;
192 case INFERENCE_ENGINE_ERROR_INVALID_PARAMETER:
193 ret = MEDIA_VISION_ERROR_INVALID_PARAMETER;
195 case INFERENCE_ENGINE_ERROR_INVALID_OPERATION:
196 ret = MEDIA_VISION_ERROR_INVALID_OPERATION;
198 case INFERENCE_ENGINE_ERROR_PERMISSION_DENIED:
199 ret = MEDIA_VISION_ERROR_PERMISSION_DENIED;
201 case INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT:
202 ret = MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
204 case INFERENCE_ENGINE_ERROR_INTERNAL:
205 ret = MEDIA_VISION_ERROR_INTERNAL;
207 case INFERENCE_ENGINE_ERROR_INVALID_DATA:
208 ret = MEDIA_VISION_ERROR_INVALID_DATA;
210 case INFERENCE_ENGINE_ERROR_INVALID_PATH:
211 ret = MEDIA_VISION_ERROR_INVALID_PATH;
214 LOGE("Unknown inference engine error type");
220 int Inference::ConvertTargetTypes(int given_types)
222 int target_types = INFERENCE_TARGET_NONE;
224 if (given_types & MV_INFERENCE_TARGET_DEVICE_CPU)
225 target_types |= INFERENCE_TARGET_CPU;
226 if (given_types & MV_INFERENCE_TARGET_DEVICE_GPU)
227 target_types |= INFERENCE_TARGET_GPU;
228 if (given_types & MV_INFERENCE_TARGET_DEVICE_CUSTOM)
229 target_types |= INFERENCE_TARGET_CUSTOM;
234 int Inference::ConvertToCv(int given_type)
238 switch (given_type) {
239 case INFERENCE_TENSOR_DATA_TYPE_UINT8:
240 LOGI("Type is %d ch with UINT8", mCh);
241 type = mCh == 1 ? CV_8UC1 : CV_8UC3;
243 case INFERENCE_TENSOR_DATA_TYPE_FLOAT32:
244 LOGI("Type is %d ch with FLOAT32", mCh);
245 type = mCh == 1 ? CV_32FC1 : CV_32FC3;
248 LOGI("unknown data type so FLOAT32 data type will be used in default");
249 type = mCh == 1 ? CV_32FC1 : CV_32FC3;
256 inference_tensor_data_type_e Inference::ConvertToIE(int given_type)
258 inference_tensor_data_type_e type = INFERENCE_TENSOR_DATA_TYPE_FLOAT32;
260 switch (given_type) {
261 case MV_INFERENCE_DATA_FLOAT32:
262 type = INFERENCE_TENSOR_DATA_TYPE_FLOAT32;
264 case MV_INFERENCE_DATA_UINT8:
265 type = INFERENCE_TENSOR_DATA_TYPE_UINT8;
268 LOGI("unknown data type so FLOAT32 data type will be used in default");
275 int Inference::Preprocess(cv::Mat cvImg, cv::Mat cvDst, int data_type)
277 mSourceSize = cvImg.size();
278 int width = mInputSize.width;
279 int height = mInputSize.height;
282 if (cvImg.channels() == 3 && mCh == 1)
283 cv::cvtColor(cvImg, sample, cv::COLOR_BGR2GRAY);
288 cv::Mat sampleResized;
289 if (sample.size() != cv::Size(width, height))
290 cv::resize(sample, sampleResized, cv::Size(width, height));
292 sampleResized = sample;
297 sampleResized.convertTo(sampleFloat, CV_32FC3);
299 sampleResized.convertTo(sampleFloat, CV_32FC1);
302 cv::Mat sampleNormalized;
305 meanMat = cv::Mat(sampleFloat.size(), CV_32FC3,
306 cv::Scalar((float) mMean, (float) mMean,
309 meanMat = cv::Mat(sampleFloat.size(), CV_32FC1,
310 cv::Scalar((float) mMean));
312 cv::subtract(sampleFloat, meanMat, sampleNormalized);
314 sampleNormalized /= static_cast<float>(mDeviation);
316 sampleNormalized.convertTo(cvDst, data_type);
318 return MEDIA_VISION_ERROR_NONE;
321 int Inference::SetUserFile(std::string filename)
323 std::ifstream fp(filename.c_str());
325 return MEDIA_VISION_ERROR_INVALID_PATH;
328 std::string userListName;
330 std::getline(fp, userListName);
331 if (userListName.length())
332 mUserListName.push_back(userListName);
337 return MEDIA_VISION_ERROR_NONE;
340 void Inference::ConfigureModelFiles(const std::string modelConfigFilePath,
341 const std::string modelWeightFilePath,
342 const std::string modelUserFilePath)
346 mConfig.mConfigFilePath = modelConfigFilePath;
347 mConfig.mWeightFilePath = modelWeightFilePath;
348 mConfig.mUserFilePath = modelUserFilePath;
353 void Inference::ConfigureTensorInfo(int width, int height, int dim, int ch,
354 double stdValue, double meanValue)
358 mConfig.mTensorInfo = { width, height, dim, ch };
359 mConfig.mStdValue = stdValue;
360 mConfig.mMeanValue = meanValue;
365 void Inference::ConfigureInputInfo(int width, int height, int dim, int ch,
366 double stdValue, double meanValue,
368 const std::vector<std::string> names)
372 mConfig.mTensorInfo = { width, height, dim, ch };
373 mConfig.mStdValue = stdValue;
374 mConfig.mMeanValue = meanValue;
375 mConfig.mDataType = static_cast<mv_inference_data_type_e>(dataType);
376 mConfig.mInputLayerNames = names;
378 inference_engine_layer_property property;
379 // In case of that a inference plugin deosn't support to get properties,
380 // the tensor info given by a user will be used.
381 // If the plugin supports that, the given info will be ignored.
382 inference_engine_tensor_info tensor_info;
384 tensor_info.data_type = ConvertToIE(dataType);
386 // In case of OpenCV, only supports NCHW
387 tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW;
388 // modify to handle multiple tensor infos
389 tensor_info.shape.push_back(mConfig.mTensorInfo.dim);
390 tensor_info.shape.push_back(mConfig.mTensorInfo.ch);
391 tensor_info.shape.push_back(mConfig.mTensorInfo.height);
392 tensor_info.shape.push_back(mConfig.mTensorInfo.width);
394 tensor_info.size = 1;
395 for (std::vector<size_t>::iterator iter = tensor_info.shape.begin();
396 iter != tensor_info.shape.end(); ++iter) {
397 tensor_info.size *= (*iter);
400 property.layer_names = mConfig.mInputLayerNames;
401 property.tensor_infos.push_back(tensor_info);
403 int ret = mBackend->SetInputLayerProperty(property);
404 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
405 LOGE("Fail to set input layer property");
411 void Inference::ConfigureOutputInfo(const std::vector<std::string> names)
415 mConfig.mOutputLayerNames = names;
417 inference_engine_layer_property property;
419 property.layer_names = names;
420 int ret = mBackend->SetOutputLayerProperty(property);
421 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
422 LOGE("Fail to set output layer property");
428 int Inference::ConfigureBackendType(
429 const mv_inference_backend_type_e backendType)
431 std::pair<std::string, bool> backend =
432 mSupportedInferenceBackend[backendType];
433 if (backend.second == false) {
434 LOGE("%s type is not supported", (backend.first).c_str());
435 return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
438 LOGI("backend engine : %d", backendType);
440 mConfig.mBackedType = backendType;
442 return MEDIA_VISION_ERROR_NONE;
445 int Inference::ConfigureTargetTypes(const int targetType)
447 // Check if given target types are valid or not.
448 if (MV_INFERENCE_TARGET_NONE >= targetType ||
449 MV_INFERENCE_TARGET_MAX <= targetType) {
450 LOGE("Invalid target device.");
451 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
454 LOGI("Before convering target types : %d", targetType);
456 unsigned int new_type = MV_INFERENCE_TARGET_DEVICE_NONE;
458 // Convert old type to new one.
459 switch (targetType) {
460 case MV_INFERENCE_TARGET_CPU:
461 new_type = MV_INFERENCE_TARGET_DEVICE_CPU;
463 case MV_INFERENCE_TARGET_GPU:
464 new_type = MV_INFERENCE_TARGET_DEVICE_GPU;
466 case MV_INFERENCE_TARGET_CUSTOM:
467 new_type = MV_INFERENCE_TARGET_DEVICE_CUSTOM;
471 LOGI("After convering target types : %d", new_type);
473 mConfig.mTargetTypes = new_type;
475 return MEDIA_VISION_ERROR_NONE;
478 int Inference::ConfigureTargetDevices(const int targetDevices)
480 // Check if given target types are valid or not.
481 if (MV_INFERENCE_TARGET_DEVICE_NONE >= targetDevices ||
482 MV_INFERENCE_TARGET_DEVICE_MAX <= targetDevices) {
483 LOGE("Invalid target device.");
484 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
487 LOGI("target devices : %d", targetDevices);
489 mConfig.mTargetTypes = targetDevices;
491 return MEDIA_VISION_ERROR_NONE;
494 void Inference::ConfigureOutput(const int maxOutputNumbers)
496 mConfig.mMaxOutputNumbers = std::max(
497 std::min(maxOutputNumbers, MV_INFERENCE_OUTPUT_NUMBERS_MAX),
498 MV_INFERENCE_OUTPUT_NUMBERS_MIN);
501 void Inference::ConfigureThreshold(const double threshold)
503 mConfig.mConfidenceThresHold = std::max(
504 std::min(threshold, MV_INFERENCE_CONFIDENCE_THRESHOLD_MAX),
505 MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN);
508 void Inference::CleanupTensorBuffers(void)
512 if (!mInputTensorBuffers.empty()) {
513 std::vector<inference_engine_tensor_buffer>::iterator iter;
514 for (iter = mInputTensorBuffers.begin();
515 iter != mInputTensorBuffers.end(); iter++) {
516 inference_engine_tensor_buffer tensor_buffer = *iter;
518 // If tensor buffer owner is a backend then skip to release the tensor buffer.
519 // This tensor buffer will be released by the backend.
520 if (tensor_buffer.owner_is_backend) {
524 if (tensor_buffer.data_type ==
525 INFERENCE_TENSOR_DATA_TYPE_FLOAT32)
526 delete[] static_cast<float *>(tensor_buffer.buffer);
528 delete[] static_cast<unsigned char *>(tensor_buffer.buffer);
531 LOGI("input tensor buffers(%zu) have been released.",
532 mInputTensorBuffers.size());
533 std::vector<inference_engine_tensor_buffer>().swap(
534 mInputTensorBuffers);
537 if (!mOutputTensorBuffers.empty()) {
538 std::vector<inference_engine_tensor_buffer>::iterator iter;
539 for (iter = mOutputTensorBuffers.begin();
540 iter != mOutputTensorBuffers.end(); iter++) {
541 inference_engine_tensor_buffer tensor_buffer = *iter;
543 // If tensor buffer owner is a backend then skip to release the tensor buffer.
544 // This tensor buffer will be released by the backend.
545 if (tensor_buffer.owner_is_backend) {
549 if (tensor_buffer.data_type ==
550 INFERENCE_TENSOR_DATA_TYPE_FLOAT32)
551 delete[] static_cast<float *>(tensor_buffer.buffer);
553 delete[] static_cast<unsigned char *>(tensor_buffer.buffer);
556 LOGI("output tensor buffers(%zu) have been released.",
557 mOutputTensorBuffers.size());
558 std::vector<inference_engine_tensor_buffer>().swap(
559 mOutputTensorBuffers);
565 int Inference::PrepareTenosrBuffers(void)
567 // If there are input and output tensor buffers allocated before then release the buffers.
568 // They will be allocated again according to a new model file to be loaded.
569 CleanupTensorBuffers();
571 // IF model file is loaded again then the model type could be different so
572 // clean up input and output layer properties so that they can be updated again
573 // after reloading the model file.
574 if (!mInputLayerProperty.tensor_infos.empty()) {
575 mInputLayerProperty.tensor_infos.clear();
576 std::vector<inference_engine_tensor_info>().swap(
577 mInputLayerProperty.tensor_infos);
579 if (!mOutputLayerProperty.tensor_infos.empty()) {
580 mOutputLayerProperty.tensor_infos.clear();
581 std::vector<inference_engine_tensor_info>().swap(
582 mOutputLayerProperty.tensor_infos);
585 // Get input tensor buffers from a backend engine if the backend engine allocated.
586 int ret = mBackend->GetInputTensorBuffers(mInputTensorBuffers);
587 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
588 LOGE("Fail to get input tensor buffers from backend engine.");
589 return ConvertEngineErrorToVisionError(ret);
592 ret = mBackend->GetInputLayerProperty(mInputLayerProperty);
593 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
594 LOGE("Fail to get input layer property from backend engine.");
595 return ConvertEngineErrorToVisionError(ret);
598 // If the backend engine isn't able to allocate input tensor buffers internally,
599 // then allocate the buffers at here.
600 if (mInputTensorBuffers.empty()) {
601 for (int i = 0; i < mInputLayerProperty.tensor_infos.size(); ++i) {
602 inference_engine_tensor_info tensor_info =
603 mInputLayerProperty.tensor_infos[i];
604 inference_engine_tensor_buffer tensor_buffer;
605 if (tensor_info.data_type ==
606 INFERENCE_TENSOR_DATA_TYPE_FLOAT32) {
607 tensor_buffer.buffer = new float[tensor_info.size];
608 tensor_buffer.size = tensor_info.size * 4;
609 } else if (tensor_info.data_type ==
610 INFERENCE_TENSOR_DATA_TYPE_UINT8) {
611 tensor_buffer.buffer = new unsigned char[tensor_info.size];
612 tensor_buffer.size = tensor_info.size;
613 } else if (tensor_info.data_type ==
614 INFERENCE_TENSOR_DATA_TYPE_FLOAT16) {
615 tensor_buffer.buffer = new short[tensor_info.size];
616 tensor_buffer.size = tensor_info.size;
618 LOGE("Invalid input tensor data type.");
619 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
622 if (tensor_buffer.buffer == NULL) {
623 LOGE("Fail to allocate input tensor buffer.");
624 return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
627 LOGI("Allocated input tensor buffer(size = %zu, data type = %d)",
628 tensor_info.size, tensor_info.data_type);
629 tensor_buffer.owner_is_backend = 0;
630 tensor_buffer.data_type = tensor_info.data_type;
631 mInputTensorBuffers.push_back(tensor_buffer);
635 LOGI("Input tensor buffer count is %zu", mInputTensorBuffers.size());
637 // Get output tensor buffers from a backend engine if the backend engine allocated.
638 ret = mBackend->GetOutputTensorBuffers(mOutputTensorBuffers);
639 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
640 LOGE("Fail to get output tensor buffers from backend engine.");
641 return ConvertEngineErrorToVisionError(ret);
644 ret = mBackend->GetOutputLayerProperty(mOutputLayerProperty);
645 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
646 LOGE("Fail to get output layer property from backend engine.");
647 return ConvertEngineErrorToVisionError(ret);
650 // If the backend engine isn't able to allocate output tensor buffers internally,
651 // then allocate the buffers at here.
652 if (mOutputTensorBuffers.empty()) {
653 for (int i = 0; i < mOutputLayerProperty.tensor_infos.size(); ++i) {
654 inference_engine_tensor_info tensor_info =
655 mOutputLayerProperty.tensor_infos[i];
656 inference_engine_tensor_buffer tensor_buffer;
657 if (tensor_info.data_type ==
658 INFERENCE_TENSOR_DATA_TYPE_FLOAT32) {
659 tensor_buffer.buffer = new float[tensor_info.size];
660 tensor_buffer.size = tensor_info.size * 4;
661 } else if (tensor_info.data_type ==
662 INFERENCE_TENSOR_DATA_TYPE_UINT8) {
663 tensor_buffer.buffer = new char[tensor_info.size];
664 tensor_buffer.size = tensor_info.size;
665 } else if (tensor_info.data_type ==
666 INFERENCE_TENSOR_DATA_TYPE_FLOAT16) {
667 tensor_buffer.buffer = new short[tensor_info.size];
668 tensor_buffer.size = tensor_info.size;
670 LOGE("Invalid output tensor data type.");
671 CleanupTensorBuffers();
672 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
675 if (tensor_buffer.buffer == NULL) {
676 LOGE("Fail to allocate output tensor buffer.");
677 CleanupTensorBuffers();
678 return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
681 LOGI("Allocated output tensor buffer(size = %zu, data type = %d)",
682 tensor_info.size, tensor_info.data_type);
684 tensor_buffer.owner_is_backend = 0;
685 tensor_buffer.data_type = tensor_info.data_type;
686 mOutputTensorBuffers.push_back(tensor_buffer);
690 LOGI("Output tensor buffer count is %zu", mOutputTensorBuffers.size());
692 return MEDIA_VISION_ERROR_NONE;
695 int Inference::FillOutputResult(tensor_t &outputData)
697 for (int i = 0; i < mOutputLayerProperty.tensor_infos.size(); ++i) {
698 inference_engine_tensor_info tensor_info =
699 mOutputLayerProperty.tensor_infos[i];
701 std::vector<int> tmpDimInfo;
702 for (int i = 0; i < static_cast<int>(tensor_info.shape.size());
704 tmpDimInfo.push_back(tensor_info.shape[i]);
707 outputData.dimInfo.push_back(tmpDimInfo);
709 // Normalize output tensor data converting it to float type in case of quantized model.
710 if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT8) {
711 float *new_buf = new float[tensor_info.size];
712 if (new_buf == NULL) {
713 LOGE("Fail to allocate a new output tensor buffer.");
714 return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
717 unsigned char *ori_buf = static_cast<unsigned char *>(
718 mOutputTensorBuffers[i].buffer);
720 for (int j = 0; j < tensor_info.size; j++) {
721 new_buf[j] = static_cast<float>(ori_buf[j]) / 255.0f;
724 // replace original buffer with new one, and release origin one.
725 mOutputTensorBuffers[i].buffer = new_buf;
727 if (!mOutputTensorBuffers[i].owner_is_backend)
731 if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_FLOAT16) {
732 float *new_buf = new float[tensor_info.size];
733 if (new_buf == NULL) {
734 LOGE("Fail to allocate a new output tensor buffer.");
735 return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
739 static_cast<short *>(mOutputTensorBuffers[i].buffer);
741 for (int j = 0; j < tensor_info.size; j++) {
742 new_buf[j] = static_cast<float>(ori_buf[j]);
745 // replace original buffer with new one, and release origin one.
746 mOutputTensorBuffers[i].buffer = new_buf;
748 if (!mOutputTensorBuffers[i].owner_is_backend)
752 outputData.data.push_back(
753 static_cast<void *>(mOutputTensorBuffers[i].buffer));
756 return MEDIA_VISION_ERROR_NONE;
759 int Inference::Bind(void)
763 if (mConfig.mBackedType <= MV_INFERENCE_BACKEND_NONE ||
764 mConfig.mBackedType >= MV_INFERENCE_BACKEND_MAX) {
765 LOGE("NOT SUPPORTED BACKEND %d", mConfig.mBackedType);
766 return MEDIA_VISION_ERROR_INVALID_OPERATION;
769 auto iter = mSupportedInferenceBackend.find(mConfig.mBackedType);
770 std::string backendName = (iter->second).first;
771 LOGI("backend string name: %s", backendName.c_str());
773 inference_engine_config config = {
774 .backend_name = backendName,
775 .backend_type = mConfig.mBackedType,
776 // As a default, Target device is CPU. If user defined desired device type in json file
777 // then the device type will be set by Load callback.
778 .target_devices = mConfig.mTargetTypes,
781 // Create a backend class object.
783 mBackend = new InferenceEngineCommon();
784 } catch (const std::bad_alloc &ex) {
785 LOGE("Fail to create backend : %s", ex.what());
786 return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
789 // Bind a backend library.
790 int ret = mBackend->BindBackend(&config);
791 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
792 LOGE("Fail to bind backend library.(%d)", mConfig.mBackedType);
793 return MEDIA_VISION_ERROR_INVALID_OPERATION;
796 // Get capacity information from a backend.
797 ret = mBackend->GetBackendCapacity(&mBackendCapacity);
798 if (ret != MEDIA_VISION_ERROR_NONE) {
799 LOGE("Fail to get backend capacity.");
805 return MEDIA_VISION_ERROR_NONE;
808 int Inference::Prepare(void)
812 mCh = mConfig.mTensorInfo.ch;
813 mDim = mConfig.mTensorInfo.dim;
815 cv::Size(mConfig.mTensorInfo.width, mConfig.mTensorInfo.height);
816 LOGI("InputSize is %d x %d\n", mInputSize.width, mInputSize.height);
818 mDeviation = mConfig.mStdValue;
819 mMean = mConfig.mMeanValue;
820 LOGI("mean %.4f, deviation %.4f", mMean, mDeviation);
822 mOutputNumbers = mConfig.mMaxOutputNumbers;
823 LOGI("outputNumber %d", mOutputNumbers);
825 mThreshold = mConfig.mConfidenceThresHold;
826 LOGI("threshold %.4f", mThreshold);
828 // Check if backend supports a given target device/devices or not.
829 if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_CPU) {
830 if (!(mBackendCapacity.supported_accel_devices &
831 INFERENCE_TARGET_CPU)) {
832 LOGE("Backend doesn't support CPU device as an accelerator.");
833 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
837 if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_GPU) {
838 if (!(mBackendCapacity.supported_accel_devices &
839 INFERENCE_TARGET_GPU)) {
840 LOGE("Backend doesn't support CPU device as an accelerator.");
841 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
845 if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_CUSTOM) {
846 if (!(mBackendCapacity.supported_accel_devices &
847 INFERENCE_TARGET_CUSTOM)) {
848 LOGE("Backend doesn't support CPU device as an accelerator.");
849 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
853 mBackend->SetTargetDevices(ConvertTargetTypes(mConfig.mTargetTypes));
857 return MEDIA_VISION_ERROR_NONE;
860 int Inference::Load(void)
864 std::string label_file = mConfig.mUserFilePath;
865 size_t userFileLength = label_file.length();
866 if (userFileLength > 0 && access(label_file.c_str(), F_OK)) {
867 LOGE("Label file path in [%s] ", label_file.c_str());
868 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
871 int ret = (userFileLength > 0) ? SetUserFile(label_file) :
872 MEDIA_VISION_ERROR_NONE;
873 if (ret != MEDIA_VISION_ERROR_NONE) {
874 LOGE("Fail to load label file.");
878 // Check if model file is valid or not.
879 std::string ext_str = mConfig.mWeightFilePath.substr(
880 mConfig.mWeightFilePath.find_last_of(".") + 1);
881 std::map<std::string, int>::iterator key = mModelFormats.find(ext_str);
882 if (key == mModelFormats.end()) {
883 LOGE("Invalid model file format.(ext = %s)", ext_str.c_str());
884 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
887 LOGI("%s model file has been detected.", ext_str.c_str());
889 std::vector<std::string> models;
891 inference_model_format_e model_format =
892 static_cast<inference_model_format_e>(key->second);
894 // Push model file information to models vector properly according to detected model format.
895 switch (model_format) {
896 case INFERENCE_MODEL_CAFFE:
897 case INFERENCE_MODEL_TF:
898 case INFERENCE_MODEL_DARKNET:
899 case INFERENCE_MODEL_DLDT:
900 case INFERENCE_MODEL_ONNX:
901 case INFERENCE_MODEL_VIVANTE:
902 models.push_back(mConfig.mWeightFilePath);
903 models.push_back(mConfig.mConfigFilePath);
905 case INFERENCE_MODEL_TFLITE:
906 case INFERENCE_MODEL_TORCH:
907 models.push_back(mConfig.mWeightFilePath);
913 // Request model loading to backend engine.
914 ret = mBackend->Load(models, model_format);
915 if (ret != INFERENCE_ENGINE_ERROR_NONE) {
917 LOGE("Fail to load model");
919 std::vector<std::string>().swap(models);
920 return ConvertEngineErrorToVisionError(ret);
923 std::vector<std::string>().swap(models);
925 // Prepare input and output tensor buffers.
926 PrepareTenosrBuffers();
932 return ConvertEngineErrorToVisionError(ret);
935 int Inference::Run(std::vector<mv_source_h> &mvSources,
936 std::vector<mv_rectangle_s> &rects)
938 int ret = INFERENCE_ENGINE_ERROR_NONE;
941 LOGE("Invalid to run inference");
942 return MEDIA_VISION_ERROR_INVALID_OPERATION;
945 /* convert mv_source to cv::Mat */
948 unsigned int width = 0, height = 0;
949 unsigned int bufferSize = 0;
950 unsigned char *buffer = NULL;
952 if (mvSources.empty()) {
953 LOGE("mvSources should contain only one cv source.");
954 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
957 // We are able to request Only one input data for the inference as of now.
958 if (mvSources.size() > 1) {
959 LOGE("It allows only one mv source for the inference.");
960 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
963 // TODO. Consider multiple sources.
964 mv_source_h mvSource = mvSources.front();
965 mv_rectangle_s *roi = rects.empty() ? NULL : &(rects.front());
967 mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID;
969 if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE ||
970 mv_source_get_height(mvSource, &height) !=
971 MEDIA_VISION_ERROR_NONE ||
972 mv_source_get_colorspace(mvSource, &colorspace) !=
973 MEDIA_VISION_ERROR_NONE ||
974 mv_source_get_buffer(mvSource, &buffer, &bufferSize))
975 return MEDIA_VISION_ERROR_INTERNAL;
977 // TODO. Let's support various color spaces.
979 if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) {
980 LOGE("Not Supported format!\n");
981 return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT;
985 cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3),
989 cvRoi.x = roi->point.x;
990 cvRoi.y = roi->point.y;
991 cvRoi.width = (roi->point.x + roi->width) >= width ?
992 width - roi->point.x :
994 cvRoi.height = (roi->point.y + roi->height) >= height ?
995 height - roi->point.y :
997 cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3),
1002 LOGE("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height);
1004 if (mCh != 1 && mCh != 3) {
1005 LOGE("Channel not supported.");
1006 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
1009 std::vector<inference_engine_tensor_buffer>::iterator iter;
1010 for (iter = mInputTensorBuffers.begin();
1011 iter != mInputTensorBuffers.end(); iter++) {
1012 inference_engine_tensor_buffer tensor_buffer = *iter;
1014 int data_type = ConvertToCv(tensor_buffer.data_type);
1016 // Convert color space of input tensor data and then normalize it.
1017 ret = Preprocess(cvSource,
1018 cv::Mat(mInputSize.height, mInputSize.width,
1019 data_type, tensor_buffer.buffer),
1021 if (ret != MEDIA_VISION_ERROR_NONE) {
1022 LOGE("Fail to preprocess input tensor data.");
1027 ret = mBackend->Run(mInputTensorBuffers, mOutputTensorBuffers);
1029 return ConvertEngineErrorToVisionError(ret);
1032 std::pair<std::string, bool>
1033 Inference::GetSupportedInferenceBackend(int backend)
1035 return mSupportedInferenceBackend[backend];
1038 int Inference::GetClassficationResults(
1039 ImageClassificationResults *classificationResults)
1041 tensor_t outputData;
1043 // Get inference result and contain it to outputData.
1044 int ret = FillOutputResult(outputData);
1045 if (ret != MEDIA_VISION_ERROR_NONE) {
1046 LOGE("Fail to get output result.");
1050 // Will contain top N results in ascending order.
1051 std::vector<std::pair<float, int> > top_results;
1052 std::priority_queue<std::pair<float, int>,
1053 std::vector<std::pair<float, int> >,
1054 std::greater<std::pair<float, int> > >
1058 std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
1059 std::vector<void *> inferResults(outputData.data.begin(),
1060 outputData.data.end());
1062 int count = inferDimInfo[0][1];
1063 LOGI("count: %d", count);
1065 float *prediction = reinterpret_cast<float *>(inferResults[0]);
1066 for (int i = 0; i < count; ++i) {
1067 value = prediction[i];
1069 // Only add it if it beats the threshold and has a chance at being in
1071 top_result_pq.push(std::pair<float, int>(value, i));
1073 // If at capacity, kick the smallest value out.
1074 if (top_result_pq.size() > mOutputNumbers) {
1075 top_result_pq.pop();
1079 // Copy to output vector and reverse into descending order.
1080 while (!top_result_pq.empty()) {
1081 top_results.push_back(top_result_pq.top());
1082 top_result_pq.pop();
1084 std::reverse(top_results.begin(), top_results.end());
1087 ImageClassificationResults results;
1088 results.number_of_classes = 0;
1089 for (int idx = 0; idx < top_results.size(); ++idx) {
1090 if (top_results[idx].first < mThreshold)
1092 LOGI("idx:%d", idx);
1093 LOGI("classIdx: %d", top_results[idx].second);
1094 LOGI("classProb: %f", top_results[idx].first);
1096 classIdx = top_results[idx].second;
1097 results.indices.push_back(classIdx);
1098 results.confidences.push_back(top_results[idx].first);
1099 results.names.push_back(mUserListName[classIdx]);
1100 results.number_of_classes++;
1103 *classificationResults = results;
1104 LOGE("Inference: GetClassificationResults: %d\n",
1105 results.number_of_classes);
1106 return MEDIA_VISION_ERROR_NONE;
1109 int Inference::GetObjectDetectionResults(
1110 ObjectDetectionResults *detectionResults)
1112 tensor_t outputData;
1114 // Get inference result and contain it to outputData.
1115 int ret = FillOutputResult(outputData);
1116 if (ret != MEDIA_VISION_ERROR_NONE) {
1117 LOGE("Fail to get output result.");
1121 // In case of object detection,
1122 // a model may apply post-process but others may not.
1123 // Thus, those cases should be hanlded separately.
1124 std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
1125 LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
1127 std::vector<void *> inferResults(outputData.data.begin(),
1128 outputData.data.end());
1129 LOGI("inferResults size: %zu", inferResults.size());
1131 float *boxes = nullptr;
1132 float *classes = nullptr;
1133 float *scores = nullptr;
1134 int number_of_detections = 0;
1136 cv::Mat cvScores, cvClasses, cvBoxes;
1137 if (outputData.dimInfo.size() == 1) {
1138 // there is no way to know how many objects are detect unless the number of objects aren't
1139 // provided. In the case, each backend should provide the number of results manually.
1140 // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
1141 // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
1142 // indicats the image id. But it is useless if a batch mode isn't supported.
1143 // So, use the 1st of 7.
1145 number_of_detections = static_cast<int>(
1146 *reinterpret_cast<float *>(outputData.data[0]));
1147 cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3],
1148 CV_32F, outputData.data[0]);
1151 cv::Mat cvLeft = cvOutputData.col(3).clone();
1152 cv::Mat cvTop = cvOutputData.col(4).clone();
1153 cv::Mat cvRight = cvOutputData.col(5).clone();
1154 cv::Mat cvBottom = cvOutputData.col(6).clone();
1156 cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
1157 cv::hconcat(cvBoxElems, 4, cvBoxes);
1160 cvClasses = cvOutputData.col(1).clone();
1163 cvScores = cvOutputData.col(2).clone();
1165 boxes = cvBoxes.ptr<float>(0);
1166 classes = cvClasses.ptr<float>(0);
1167 scores = cvScores.ptr<float>(0);
1170 boxes = reinterpret_cast<float *>(inferResults[0]);
1171 classes = reinterpret_cast<float *>(inferResults[1]);
1172 scores = reinterpret_cast<float *>(inferResults[2]);
1173 number_of_detections =
1174 (int) (*reinterpret_cast<float *>(inferResults[3]));
1177 LOGI("number_of_detections = %d", number_of_detections);
1179 int left, top, right, bottom;
1182 ObjectDetectionResults results;
1183 results.number_of_objects = 0;
1184 for (int idx = 0; idx < number_of_detections; ++idx) {
1185 if (scores[idx] < mThreshold)
1188 left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
1189 top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
1190 right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
1191 bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
1195 loc.width = right - left + 1;
1196 loc.height = bottom - top + 1;
1198 results.indices.push_back(static_cast<int>(classes[idx]));
1199 results.confidences.push_back(scores[idx]);
1200 results.names.push_back(
1201 mUserListName[static_cast<int>(classes[idx])]);
1202 results.locations.push_back(loc);
1203 results.number_of_objects++;
1205 LOGI("objectClass: %d", static_cast<int>(classes[idx]));
1206 LOGI("confidence:%f", scores[idx]);
1207 LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right,
1211 *detectionResults = results;
1212 LOGE("Inference: GetObjectDetectionResults: %d\n",
1213 results.number_of_objects);
1214 return MEDIA_VISION_ERROR_NONE;
1218 Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
1220 tensor_t outputData;
1222 // Get inference result and contain it to outputData.
1223 int ret = FillOutputResult(outputData);
1224 if (ret != MEDIA_VISION_ERROR_NONE) {
1225 LOGE("Fail to get output result.");
1229 // In case of object detection,
1230 // a model may apply post-process but others may not.
1231 // Thus, those cases should be hanlded separately.
1232 std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
1233 LOGI("inferDimInfo size: %zu", outputData.dimInfo.size());
1235 std::vector<void *> inferResults(outputData.data.begin(),
1236 outputData.data.end());
1237 LOGI("inferResults size: %zu", inferResults.size());
1239 float *boxes = nullptr;
1240 float *classes = nullptr;
1241 float *scores = nullptr;
1242 int number_of_detections = 0;
1244 cv::Mat cvScores, cvClasses, cvBoxes;
1245 if (outputData.dimInfo.size() == 1) {
1246 // there is no way to know how many objects are detect unless the number of objects aren't
1247 // provided. In the case, each backend should provide the number of results manually.
1248 // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are
1249 // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7
1250 // indicats the image id. But it is useless if a batch mode isn't supported.
1251 // So, use the 1st of 7.
1253 number_of_detections = static_cast<int>(
1254 *reinterpret_cast<float *>(outputData.data[0]));
1255 cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3],
1256 CV_32F, outputData.data[0]);
1259 cv::Mat cvLeft = cvOutputData.col(3).clone();
1260 cv::Mat cvTop = cvOutputData.col(4).clone();
1261 cv::Mat cvRight = cvOutputData.col(5).clone();
1262 cv::Mat cvBottom = cvOutputData.col(6).clone();
1264 cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight };
1265 cv::hconcat(cvBoxElems, 4, cvBoxes);
1268 cvClasses = cvOutputData.col(1).clone();
1271 cvScores = cvOutputData.col(2).clone();
1273 boxes = cvBoxes.ptr<float>(0);
1274 classes = cvClasses.ptr<float>(0);
1275 scores = cvScores.ptr<float>(0);
1278 boxes = reinterpret_cast<float *>(inferResults[0]);
1279 classes = reinterpret_cast<float *>(inferResults[1]);
1280 scores = reinterpret_cast<float *>(inferResults[2]);
1281 number_of_detections = static_cast<int>(
1282 *reinterpret_cast<float *>(inferResults[3]));
1285 int left, top, right, bottom;
1288 FaceDetectionResults results;
1289 results.number_of_faces = 0;
1290 for (int idx = 0; idx < number_of_detections; ++idx) {
1291 if (scores[idx] < mThreshold)
1294 left = static_cast<int>(boxes[idx * 4 + 1] * mSourceSize.width);
1295 top = static_cast<int>(boxes[idx * 4 + 0] * mSourceSize.height);
1296 right = static_cast<int>(boxes[idx * 4 + 3] * mSourceSize.width);
1297 bottom = static_cast<int>(boxes[idx * 4 + 2] * mSourceSize.height);
1301 loc.width = right - left + 1;
1302 loc.height = bottom - top + 1;
1304 results.confidences.push_back(scores[idx]);
1305 results.locations.push_back(loc);
1306 results.number_of_faces++;
1308 LOGI("confidence:%f", scores[idx]);
1309 LOGI("class: %f", classes[idx]);
1310 LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1],
1311 boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]);
1312 LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right,
1316 *detectionResults = results;
1317 LOGE("Inference: GetFaceDetectionResults: %d\n",
1318 results.number_of_faces);
1319 return MEDIA_VISION_ERROR_NONE;
1322 int Inference::GetFacialLandMarkDetectionResults(
1323 FacialLandMarkDetectionResults *detectionResults)
1325 tensor_t outputData;
1327 // Get inference result and contain it to outputData.
1328 int ret = FillOutputResult(outputData);
1329 if (ret != MEDIA_VISION_ERROR_NONE) {
1330 LOGE("Fail to get output result.");
1334 std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
1335 std::vector<void *> inferResults(outputData.data.begin(),
1336 outputData.data.end());
1338 long number_of_detections = inferDimInfo[0][1];
1339 float *loc = reinterpret_cast<float *>(inferResults[0]);
1341 FacialLandMarkDetectionResults results;
1342 results.number_of_landmarks = 0;
1344 cv::Point point(0, 0);
1345 results.number_of_landmarks = 0;
1346 LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height);
1347 for (int idx = 0; idx < number_of_detections; idx += 2) {
1348 point.x = static_cast<int>(loc[idx] * mSourceSize.width);
1349 point.y = static_cast<int>(loc[idx + 1] * mSourceSize.height);
1351 results.locations.push_back(point);
1352 results.number_of_landmarks++;
1354 LOGI("x:%d, y:%d", point.x, point.y);
1357 *detectionResults = results;
1358 LOGE("Inference: FacialLandmarkDetectionResults: %d\n",
1359 results.number_of_landmarks);
1360 return MEDIA_VISION_ERROR_NONE;
1363 int Inference::GetPoseEstimationDetectionResults(
1364 PoseEstimationResults *detectionResults)
1366 tensor_t outputData;
1368 // Get inference result and contain it to outputData.
1369 int ret = FillOutputResult(outputData);
1370 if (ret != MEDIA_VISION_ERROR_NONE) {
1371 LOGE("Fail to get output result.");
1375 std::vector<std::vector<int> > inferDimInfo(outputData.dimInfo);
1376 std::vector<void *> inferResults(outputData.data.begin(),
1377 outputData.data.end());
1379 long number_of_pose = inferDimInfo[0][3];
1380 float *tmp = static_cast<float *>(inferResults[0]);
1381 cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]);
1385 cv::Mat blurredHeatMap;
1387 cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]),
1388 CV_32FC(inferDimInfo[0][3]), (void *) tmp);
1390 cv::Mat multiChannels[inferDimInfo[0][3]];
1391 split(reShapeTest, multiChannels);
1393 float ratioX = static_cast<float>(mSourceSize.width) /
1394 static_cast<float>(inferDimInfo[0][2]);
1395 float ratioY = static_cast<float>(mSourceSize.height) /
1396 static_cast<float>(inferDimInfo[0][1]);
1398 PoseEstimationResults results;
1399 results.number_of_pose_estimation = 0;
1400 for (int poseIdx = 0; poseIdx < number_of_pose; poseIdx++) {
1401 cv::Mat heatMap = multiChannels[poseIdx];
1403 cv::GaussianBlur(heatMap, blurredHeatMap, cv::Size(), 5.0, 5.0);
1404 cv::minMaxLoc(heatMap, NULL, &score, NULL, &loc);
1406 LOGI("PoseIdx[%2d]: x[%2d], y[%2d], score[%.3f]", poseIdx, loc.x,
1408 LOGI("PoseIdx[%2d]: x[%2d], y[%2d], score[%.3f]", poseIdx,
1409 static_cast<int>(static_cast<float>(loc.x + 1) * ratioX),
1410 static_cast<int>(static_cast<float>(loc.y + 1) * ratioY),
1413 loc.x = static_cast<int>(static_cast<float>(loc.x + 1) * ratioX);
1414 loc.y = static_cast<int>(static_cast<float>(loc.y + 1) * ratioY);
1415 results.locations.push_back(loc);
1416 results.number_of_pose_estimation++;
1419 *detectionResults = results;
1420 LOGE("Inference: PoseEstimationResults: %d\n",
1421 results.number_of_pose_estimation);
1422 return MEDIA_VISION_ERROR_NONE;