mv_machine_learning: use carnel notation
[platform/core/api/mediavision.git] / mv_machine_learning / inference / src / mv_inference_open.cpp
1 /**
2  * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "mv_private.h"
18 #include "mv_inference_open.h"
19
20 #include "Inference.h"
21 #include "Posture.h"
22
23 #include <new>
24 #include <unistd.h>
25 #include <string>
26
27 using namespace mediavision::inference;
28
29 mv_engine_config_h mv_inference_get_engine_config(mv_inference_h infer)
30 {
31         Inference *pInfer = static_cast<Inference *>(infer);
32
33         return pInfer->getEngineConfig();
34 }
35
36 int mv_inference_create_open(mv_inference_h *infer)
37 {
38         if (infer == NULL) {
39                 LOGE("Handle can't be created because handle pointer is NULL");
40                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
41         }
42
43         (*infer) = static_cast<mv_inference_h>(new (std::nothrow) Inference());
44
45         if (*infer == NULL) {
46                 LOGE("Failed to create inference handle");
47                 return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
48         }
49
50         LOGD("Inference handle [%p] has been created", *infer);
51
52         return MEDIA_VISION_ERROR_NONE;
53 }
54
55 int mv_inference_destroy_open(mv_inference_h infer)
56 {
57         if (!infer) {
58                 LOGE("Handle can't be destroyed because handle is NULL");
59                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
60         }
61
62         LOGD("Destroying inference handle [%p]", infer);
63         delete static_cast<Inference *>(infer);
64         LOGD("Inference handle has been destroyed");
65
66         return MEDIA_VISION_ERROR_NONE;
67 }
68
69 static bool IsJsonFile(const std::string &fileName)
70 {
71         return (!fileName.substr(fileName.find_last_of(".") + 1).compare("json"));
72 }
73
74 static bool IsValidBackendType(const int backend_type)
75 {
76         return (backend_type > MV_INFERENCE_BACKEND_NONE && backend_type < MV_INFERENCE_BACKEND_MAX);
77 }
78
79 static bool IsConfigFilePathRequired(const int target_device_type, const int backend_type)
80 {
81         LOGW("DEPRECATION WARNING : MV_INFERENCE_BACKEND_MLAPI type is deprecated and will be removed from next release.");
82
83         // In case of MV_INFERENCE_TARGET_DEVICE_CUSTOM via MLAPI backend, config file path is required.
84         return (backend_type == MV_INFERENCE_BACKEND_MLAPI && target_device_type & MV_INFERENCE_TARGET_DEVICE_CUSTOM);
85 }
86
87 static int configure_tensor_info_from_meta_file(Inference *pInfer, mv_engine_config_h engine_config)
88 {
89         char *modelMetaFilePath = NULL;
90
91         int ret =
92                         mv_engine_config_get_string_attribute(engine_config, MV_INFERENCE_MODEL_META_FILE_PATH, &modelMetaFilePath);
93         if (ret != MEDIA_VISION_ERROR_NONE) {
94                 LOGE("Fail to get model meta file path");
95                 goto out_of_function;
96         }
97
98         if (std::string(modelMetaFilePath).empty()) {
99                 LOGW("Skip ParseMetadata and run without Metadata");
100                 ret = MEDIA_VISION_ERROR_INVALID_OPERATION;
101                 goto release_model_meta_file_path;
102         }
103
104         if (!IsJsonFile(std::string(modelMetaFilePath))) {
105                 ret = MEDIA_VISION_ERROR_INVALID_PATH;
106                 LOGE("Model meta file should be json");
107                 goto release_model_meta_file_path;
108         }
109
110         ret = pInfer->parseMetadata(std::string(modelMetaFilePath));
111         if (ret != MEDIA_VISION_ERROR_NONE) {
112                 LOGE("Fail to ParseMetadata");
113         }
114
115 release_model_meta_file_path:
116         free(modelMetaFilePath);
117
118 out_of_function:
119         LOGI("LEAVE");
120
121         return ret;
122 }
123
124 static int configure_model_open(Inference *pInfer, mv_engine_config_h engine_config)
125 {
126         LOGI("ENTER");
127
128         char *modelConfigFilePath = NULL;
129         char *modelWeightFilePath = NULL;
130         char *modelUserFilePath = NULL;
131         int backendType = 0;
132         size_t userFileLength = 0;
133
134         int ret = mv_engine_config_get_string_attribute(engine_config, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH,
135                                                                                                         &modelConfigFilePath);
136         if (ret != MEDIA_VISION_ERROR_NONE) {
137                 LOGE("Fail to get model configuration file path");
138                 goto out_of_function;
139         }
140
141         ret = mv_engine_config_get_string_attribute(engine_config, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH,
142                                                                                                 &modelWeightFilePath);
143         if (ret != MEDIA_VISION_ERROR_NONE) {
144                 LOGE("Fail to get model weight file path");
145                 goto release_model_config_file_path;
146         }
147
148         ret = mv_engine_config_get_string_attribute(engine_config, MV_INFERENCE_MODEL_USER_FILE_PATH, &modelUserFilePath);
149         if (ret != MEDIA_VISION_ERROR_NONE) {
150                 LOGE("Fail to get model user file path");
151                 goto release_model_weight_file_path;
152         }
153
154         ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_BACKEND_TYPE, &backendType);
155         if (ret != MEDIA_VISION_ERROR_NONE) {
156                 LOGE("Fail to get inference backend type");
157                 goto release_model_user_file_path;
158         }
159
160         if (!IsValidBackendType(backendType)) {
161                 LOGE("Invalid backend type(%d).", backendType);
162                 ret = MEDIA_VISION_ERROR_INVALID_PARAMETER;
163                 goto release_model_user_file_path;
164         }
165
166         if (access(modelWeightFilePath, F_OK)) {
167                 LOGE("weightFilePath in [%s] ", modelWeightFilePath);
168                 ret = MEDIA_VISION_ERROR_INVALID_PATH;
169                 goto release_model_user_file_path;
170         }
171
172         if (IsConfigFilePathRequired(pInfer->getTargetType(), backendType)) {
173                 if (access(modelConfigFilePath, F_OK)) {
174                         LOGE("modelConfigFilePath in [%s] ", modelConfigFilePath);
175                         ret = MEDIA_VISION_ERROR_INVALID_PATH;
176                         goto release_model_user_file_path;
177                 }
178         }
179
180         userFileLength = strlen(modelUserFilePath);
181
182         if (userFileLength > 0 && access(modelUserFilePath, F_OK)) {
183                 LOGE("categoryFilePath in [%s] ", modelUserFilePath);
184                 ret = MEDIA_VISION_ERROR_INVALID_PATH;
185                 goto release_model_user_file_path;
186         }
187
188         pInfer->configureModelFiles(std::string(modelConfigFilePath), std::string(modelWeightFilePath),
189                                                                 std::string(modelUserFilePath));
190
191 release_model_user_file_path:
192         free(modelUserFilePath);
193
194 release_model_weight_file_path:
195         free(modelWeightFilePath);
196
197 release_model_config_file_path:
198         free(modelConfigFilePath);
199
200 out_of_function:
201         LOGI("LEAVE");
202
203         return ret;
204 }
205
206 static int configure_input_info_open(Inference *pInfer, mv_engine_config_h engine_config)
207 {
208         LOGI("ENTER");
209
210         int tensorWidth, tensorHeight, tensorCh;
211         double meanValue, stdValue;
212         char *node_name = NULL;
213         int dataType = 0;
214
215         int ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_INPUT_TENSOR_WIDTH, &tensorWidth);
216         if (ret != MEDIA_VISION_ERROR_NONE) {
217                 LOGE("Fail to get tensor width");
218                 goto out_of_function;
219         }
220
221         ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_INPUT_TENSOR_HEIGHT, &tensorHeight);
222         if (ret != MEDIA_VISION_ERROR_NONE) {
223                 LOGE("Fail to get tensor height");
224                 goto out_of_function;
225         }
226
227         ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_INPUT_TENSOR_CHANNELS, &tensorCh);
228         if (ret != MEDIA_VISION_ERROR_NONE) {
229                 LOGE("Fail to get tensor channels");
230                 goto out_of_function;
231         }
232
233         ret = mv_engine_config_get_double_attribute(engine_config, MV_INFERENCE_MODEL_MEAN_VALUE, &meanValue);
234         if (ret != MEDIA_VISION_ERROR_NONE) {
235                 LOGE("Fail to get meanValue");
236                 goto out_of_function;
237         }
238
239         ret = mv_engine_config_get_double_attribute(engine_config, MV_INFERENCE_MODEL_STD_VALUE, &stdValue);
240         if (ret != MEDIA_VISION_ERROR_NONE) {
241                 LOGE("Fail to get stdValue");
242                 goto out_of_function;
243         }
244
245         ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_INPUT_DATA_TYPE, &dataType);
246         if (ret != MEDIA_VISION_ERROR_NONE) {
247                 LOGE("Fail to get a input tensor data type");
248                 goto out_of_function;
249         }
250
251         ret = mv_engine_config_get_string_attribute(engine_config, MV_INFERENCE_INPUT_NODE_NAME, &node_name);
252         if (ret != MEDIA_VISION_ERROR_NONE) {
253                 LOGE("Fail to get tensor width");
254                 goto out_of_function;
255         }
256
257         pInfer->configureInputInfo(tensorWidth, tensorHeight, 1, tensorCh, stdValue, meanValue, dataType,
258                                                            std::vector<std::string>(1, std::string(node_name)));
259
260         free(node_name);
261         node_name = NULL;
262
263 out_of_function:
264         LOGI("LEAVE");
265
266         return ret;
267 }
268
269 int mv_inference_configure_engine_open(mv_inference_h infer, mv_engine_config_h engine_config)
270 {
271         LOGI("ENTER");
272
273         Inference *pInfer = static_cast<Inference *>(infer);
274         int backendType = 0;
275         int targetTypes = 0;
276
277         pInfer->setEngineConfig(engine_config);
278
279         int ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_BACKEND_TYPE, &backendType);
280         if (ret != MEDIA_VISION_ERROR_NONE) {
281                 LOGE("Fail to get inference backend type");
282                 goto out_of_function;
283         }
284
285         ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_TARGET_DEVICE_TYPE, &targetTypes);
286         if (ret != MEDIA_VISION_ERROR_NONE) {
287                 LOGE("Fail to get inference target type");
288                 goto out_of_function;
289         }
290
291         // Set target device type.
292         if (pInfer->configureTargetDevices(targetTypes) != MEDIA_VISION_ERROR_NONE) {
293                 LOGE("Tried to configure invalid target types.");
294                 goto out_of_function;
295         }
296
297         // Create a inference-engine-common class object and load its corresponding library.
298         // Ps. Inference engine gets a capability from a given backend by Bind call
299         // so access to mBackendCapacity should be done after Bind.
300         ret = pInfer->bind(backendType, targetTypes);
301         if (ret != MEDIA_VISION_ERROR_NONE) {
302                 LOGE("Fail to bind a backend engine.");
303                 goto out_of_function;
304         }
305
306 out_of_function:
307         LOGI("LEAVE");
308
309         return ret;
310 }
311
312 int mv_inference_configure_output_open(mv_inference_h infer, mv_engine_config_h engine_config)
313 {
314         LOGI("ENTER");
315
316         Inference *pInfer = static_cast<Inference *>(infer);
317         int maxOutput = 0;
318
319         int ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_OUTPUT_MAX_NUMBER, &maxOutput);
320         if (ret != MEDIA_VISION_ERROR_NONE) {
321                 LOGE("Fail to get inference output maximum numbers");
322                 goto out_of_function;
323         }
324
325         pInfer->configureOutput(maxOutput);
326
327 out_of_function:
328         LOGI("LEAVE");
329
330         return ret;
331 }
332
333 int mv_inference_configure_confidence_threshold_open(mv_inference_h infer, mv_engine_config_h engine_config)
334 {
335         LOGI("ENTER");
336
337         Inference *pInfer = static_cast<Inference *>(infer);
338         double threshold = 0;
339
340         int ret = mv_engine_config_get_double_attribute(engine_config, MV_INFERENCE_CONFIDENCE_THRESHOLD, &threshold);
341         if (ret != MEDIA_VISION_ERROR_NONE) {
342                 LOGE("Fail to get inference confidence threshold value");
343                 goto out_of_function;
344         }
345
346         pInfer->configureThreshold(threshold);
347
348 out_of_function:
349         LOGI("LEAVE");
350
351         return ret;
352 }
353
354 static int configure_post_process_info_open(Inference *pInfer, mv_engine_config_h engine_config)
355 {
356         LOGI("ENTER");
357
358         int maxOutput = 0;
359         double threshold = 0;
360
361         int ret = mv_engine_config_get_int_attribute(engine_config, MV_INFERENCE_OUTPUT_MAX_NUMBER, &maxOutput);
362         if (ret != MEDIA_VISION_ERROR_NONE) {
363                 LOGE("Fail to get inference output maximum numbers");
364                 goto out_of_function;
365         }
366
367         pInfer->configureOutput(maxOutput);
368
369         ret = mv_engine_config_get_double_attribute(engine_config, MV_INFERENCE_CONFIDENCE_THRESHOLD, &threshold);
370         if (ret != MEDIA_VISION_ERROR_NONE) {
371                 LOGE("Fail to get inference confidence threshold value");
372                 goto out_of_function;
373         }
374
375         pInfer->configureThreshold(threshold);
376
377 out_of_function:
378         LOGI("LEAVE");
379
380         return ret;
381 }
382
383 static int configure_output_info_open(Inference *pInfer, mv_engine_config_h engine_config)
384 {
385         LOGI("ENTER");
386
387         int idx = 0;
388         char **node_names = NULL;
389         int size = 0;
390         std::vector<std::string> names;
391
392         int ret = mv_engine_config_get_array_string_attribute(engine_config, MV_INFERENCE_OUTPUT_NODE_NAMES, &node_names,
393                                                                                                                   &size);
394         if (ret != MEDIA_VISION_ERROR_NONE) {
395                 LOGE("Fail to get _output_node_names");
396                 return ret;
397         }
398
399         for (idx = 0; idx < size; ++idx)
400                 names.push_back(std::string(node_names[idx]));
401
402         std::vector<inference_engine_tensor_info> tensors_info;
403
404         pInfer->configureOutputInfo(names, tensors_info);
405
406         if (node_names) {
407                 for (idx = 0; idx < size; ++idx)
408                         free(node_names[idx]);
409
410                 free(node_names);
411                 node_names = NULL;
412         }
413
414         LOGI("LEAVE");
415
416         return ret;
417 }
418
419 int mv_inference_prepare_open(mv_inference_h infer)
420 {
421         LOGI("ENTER");
422
423         Inference *pInfer = static_cast<Inference *>(infer);
424         mv_engine_config_h engine_config = mv_inference_get_engine_config(infer);
425
426         int ret = configure_model_open(pInfer, engine_config);
427         if (ret != MEDIA_VISION_ERROR_NONE) {
428                 LOGE("Fail to configure model");
429                 return ret;
430         }
431
432         // If input and output tensor info aren't configured from model meta file then
433         // use legacy way for the configuration. TODO. the legacy way will be deprecated.
434         ret = configure_tensor_info_from_meta_file(pInfer, engine_config);
435         if (ret != MEDIA_VISION_ERROR_NONE) {
436                 // input tensor, input layer
437                 ret = configure_input_info_open(pInfer, engine_config);
438                 if (ret != MEDIA_VISION_ERROR_NONE) {
439                         LOGE("Fail to configure input info");
440                         return ret;
441                 }
442
443                 // output layer
444                 ret = configure_output_info_open(pInfer, engine_config);
445                 if (ret != MEDIA_VISION_ERROR_NONE) {
446                         LOGE("Fail to configure output info");
447                         return ret;
448                 }
449
450                 // maximum candidates, threshold
451                 ret = configure_post_process_info_open(pInfer, engine_config);
452                 if (ret != MEDIA_VISION_ERROR_NONE) {
453                         LOGE("Fail to configure post process info");
454                         return ret;
455                 }
456         } else {
457                 ret = pInfer->configureInputMetaInfo();
458                 if (ret != MEDIA_VISION_ERROR_NONE) {
459                         LOGE("Fail to configure input meta info.");
460                         return ret;
461                 }
462
463                 ret = pInfer->configureOutputMetaInfo();
464                 if (ret != MEDIA_VISION_ERROR_NONE) {
465                         LOGE("Fail to configure output meta info.");
466                         return ret;
467                 }
468         }
469
470         // Request to load model files to a backend engine.
471         ret = pInfer->load();
472         if (ret != MEDIA_VISION_ERROR_NONE)
473                 LOGE("Fail to load model files.");
474
475         LOGI("LEAVE");
476
477         return ret;
478 }
479
480 int mv_inference_foreach_supported_engine_open(mv_inference_h infer, mv_inference_supported_engine_cb callback,
481                                                                                            void *user_data)
482 {
483         LOGI("ENTER");
484
485         Inference *pInfer = static_cast<Inference *>(infer);
486         std::pair<std::string, bool> backend;
487
488         for (int i = 0; i < MV_INFERENCE_BACKEND_MAX; ++i) {
489                 backend = pInfer->getSupportedInferenceBackend(i);
490                 callback((backend.first).c_str(), backend.second, user_data);
491         }
492
493         LOGI("LEAVE");
494
495         return MEDIA_VISION_ERROR_NONE;
496 }
497
498 int mv_inference_image_classify_open(mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi,
499                                                                          mv_inference_image_classified_cb classified_cb, void *user_data)
500 {
501         Inference *pInfer = static_cast<Inference *>(infer);
502         std::vector<mv_source_h> sources;
503         std::vector<mv_rectangle_s> rects;
504
505         sources.push_back(source);
506
507         if (roi != NULL)
508                 rects.push_back(*roi);
509
510         int ret = pInfer->run(sources, rects);
511         if (ret != MEDIA_VISION_ERROR_NONE) {
512                 LOGE("Fail to run inference");
513                 return ret;
514         }
515
516         ImageClassificationResults classificationResults;
517
518         ret = pInfer->getClassficationResults(&classificationResults);
519         if (ret != MEDIA_VISION_ERROR_NONE) {
520                 LOGE("Fail to get inference results");
521                 return ret;
522         }
523
524         int numberOfOutputs = classificationResults.number_of_classes;
525         static const int START_CLASS_NUMBER = 10;
526         static std::vector<const char *> names(START_CLASS_NUMBER);
527
528         if (numberOfOutputs > START_CLASS_NUMBER)
529                 names.resize(numberOfOutputs);
530
531         LOGE("mv_inference_open: number_of_classes: %d\n", numberOfOutputs);
532
533         for (int output_index = 0; output_index < numberOfOutputs; ++output_index) {
534                 LOGE("names: %s", classificationResults.names[output_index].c_str());
535                 names[output_index] = classificationResults.names[output_index].c_str();
536         }
537
538         auto *indices = classificationResults.indices.data();
539         auto *confidences = classificationResults.confidences.data();
540
541         classified_cb(source, numberOfOutputs, indices, names.data(), confidences, user_data);
542
543         return ret;
544 }
545
546 int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer,
547                                                                         mv_inference_object_detected_cb detected_cb, void *user_data)
548 {
549         Inference *pInfer = static_cast<Inference *>(infer);
550         std::vector<mv_source_h> sources;
551         std::vector<mv_rectangle_s> rects;
552
553         sources.push_back(source);
554         int ret = pInfer->run(sources, rects);
555         if (ret != MEDIA_VISION_ERROR_NONE) {
556                 LOGE("Fail to run inference");
557                 return ret;
558         }
559
560         ObjectDetectionResults objectDetectionResults;
561
562         ret = pInfer->getObjectDetectionResults(&objectDetectionResults);
563         if (ret != MEDIA_VISION_ERROR_NONE) {
564                 LOGE("Fail to get inference results");
565                 return ret;
566         }
567
568         int numberOfOutputs = objectDetectionResults.number_of_objects;
569         static const int START_OBJECT_NUMBER = 20;
570         static std::vector<const char *> names(START_OBJECT_NUMBER);
571         static std::vector<mv_rectangle_s> locations(START_OBJECT_NUMBER);
572
573         if (numberOfOutputs > START_OBJECT_NUMBER) {
574                 names.resize(numberOfOutputs);
575                 locations.resize(numberOfOutputs);
576         }
577
578         for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
579                 LOGI("names: %s", objectDetectionResults.names[output_idx].c_str());
580                 names[output_idx] = objectDetectionResults.names[output_idx].c_str();
581
582                 locations[output_idx].point.x = objectDetectionResults.locations[output_idx].x;
583                 locations[output_idx].point.y = objectDetectionResults.locations[output_idx].y;
584                 locations[output_idx].width = objectDetectionResults.locations[output_idx].width;
585                 locations[output_idx].height = objectDetectionResults.locations[output_idx].height;
586                 LOGI("%d, %d, %d, %d", locations[output_idx].point.x, locations[output_idx].point.y,
587                          locations[output_idx].width, locations[output_idx].height);
588         }
589
590         int *indices = objectDetectionResults.indices.data();
591         float *confidences = objectDetectionResults.confidences.data();
592
593         detected_cb(source, numberOfOutputs, indices, names.data(), confidences, locations.data(), user_data);
594
595         return ret;
596 }
597
598 int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer, mv_inference_face_detected_cb detected_cb,
599                                                                   void *user_data)
600 {
601         Inference *pInfer = static_cast<Inference *>(infer);
602         std::vector<mv_source_h> sources;
603         std::vector<mv_rectangle_s> rects;
604
605         sources.push_back(source);
606
607         int ret = pInfer->run(sources, rects);
608         if (ret != MEDIA_VISION_ERROR_NONE) {
609                 LOGE("Fail to run inference");
610                 return ret;
611         }
612
613         FaceDetectionResults faceDetectionResults;
614
615         ret = pInfer->getFaceDetectionResults(&faceDetectionResults);
616         if (ret != MEDIA_VISION_ERROR_NONE) {
617                 LOGE("Fail to get inference results");
618                 return ret;
619         }
620
621         int numberOfOutputs = faceDetectionResults.number_of_faces;
622         std::vector<mv_rectangle_s> locations(numberOfOutputs);
623
624         for (int output_idx = 0; output_idx < numberOfOutputs; ++output_idx) {
625                 locations[output_idx].point.x = faceDetectionResults.locations[output_idx].x;
626                 locations[output_idx].point.y = faceDetectionResults.locations[output_idx].y;
627                 locations[output_idx].width = faceDetectionResults.locations[output_idx].width;
628                 locations[output_idx].height = faceDetectionResults.locations[output_idx].height;
629         }
630
631         float *confidences = faceDetectionResults.confidences.data();
632
633         detected_cb(source, numberOfOutputs, confidences, locations.data(), user_data);
634
635         return ret;
636 }
637
638 int mv_inference_facial_landmark_detect_open(mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi,
639                                                                                          mv_inference_facial_landmark_detected_cb detected_cb, void *user_data)
640 {
641         Inference *pInfer = static_cast<Inference *>(infer);
642         std::vector<mv_source_h> sources;
643         std::vector<mv_rectangle_s> rects;
644
645         sources.push_back(source);
646
647         if (roi != NULL)
648                 rects.push_back(*roi);
649
650         int ret = pInfer->run(sources, rects);
651         if (ret != MEDIA_VISION_ERROR_NONE) {
652                 LOGE("Fail to run inference");
653                 return ret;
654         }
655
656         FacialLandMarkDetectionResults facialLandMarkDetectionResults;
657
658         ret = pInfer->getFacialLandMarkDetectionResults(&facialLandMarkDetectionResults);
659         if (ret != MEDIA_VISION_ERROR_NONE) {
660                 LOGE("Fail to get inference results");
661                 return ret;
662         }
663
664         int numberOfLandmarks = facialLandMarkDetectionResults.number_of_landmarks;
665         std::vector<mv_point_s> locations(numberOfLandmarks);
666
667         for (int landmark_idx = 0; landmark_idx < numberOfLandmarks; ++landmark_idx) {
668                 locations[landmark_idx].x = facialLandMarkDetectionResults.locations[landmark_idx].x;
669                 locations[landmark_idx].y = facialLandMarkDetectionResults.locations[landmark_idx].y;
670         }
671
672         detected_cb(source, numberOfLandmarks, locations.data(), user_data);
673
674         return ret;
675 }
676
677 int mv_inference_pose_landmark_detect_open(mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi,
678                                                                                    mv_inference_pose_landmark_detected_cb detected_cb, void *user_data)
679 {
680         Inference *pInfer = static_cast<Inference *>(infer);
681         unsigned int width, height;
682
683         int ret = mv_source_get_width(source, &width);
684         if (ret != MEDIA_VISION_ERROR_NONE) {
685                 LOGE("Fail to get width");
686                 return ret;
687         }
688
689         ret = mv_source_get_height(source, &height);
690         if (ret != MEDIA_VISION_ERROR_NONE) {
691                 LOGE("Fail to get height");
692                 return ret;
693         }
694
695         std::vector<mv_source_h> sources;
696         std::vector<mv_rectangle_s> rects;
697
698         sources.push_back(source);
699
700         if (roi != NULL)
701                 rects.push_back(*roi);
702
703         ret = pInfer->run(sources, rects);
704         if (ret != MEDIA_VISION_ERROR_NONE) {
705                 LOGE("Fail to run inference");
706                 return ret;
707         }
708
709         std::unique_ptr<mv_inference_pose_s> pose;
710
711         ret = pInfer->getPoseLandmarkDetectionResults(pose, width, height);
712         if (ret != MEDIA_VISION_ERROR_NONE) {
713                 LOGE("Fail to get inference results");
714                 return ret;
715         }
716
717         for (int pose_index = 0; pose_index < pose->number_of_poses; ++pose_index) {
718                 for (int landmark_index = 0; landmark_index < pose->number_of_landmarks_per_pose; ++landmark_index) {
719                         LOGI("PoseIdx[%2d]: x[%d], y[%d], score[%.3f]", landmark_index,
720                                  pose->landmarks[pose_index][landmark_index].point.x,
721                                  pose->landmarks[pose_index][landmark_index].point.y,
722                                  pose->landmarks[pose_index][landmark_index].score);
723                 }
724         }
725
726         detected_cb(source, static_cast<mv_inference_pose_result_h>(pose.get()), user_data);
727
728         return ret;
729 }
730
731 int mv_inference_pose_get_number_of_poses_open(mv_inference_pose_result_h result, int *number_of_poses)
732 {
733         mv_inference_pose_s *handle = static_cast<mv_inference_pose_s *>(result);
734         *number_of_poses = handle->number_of_poses;
735         LOGI("%d", *number_of_poses);
736
737         return MEDIA_VISION_ERROR_NONE;
738 }
739
740 int mv_inference_pose_get_number_of_landmarks_open(mv_inference_pose_result_h result, int *number_of_landmarks)
741 {
742         mv_inference_pose_s *handle = static_cast<mv_inference_pose_s *>(result);
743         *number_of_landmarks = handle->number_of_landmarks_per_pose;
744         LOGI("%d", *number_of_landmarks);
745
746         return MEDIA_VISION_ERROR_NONE;
747 }
748
749 int mv_inference_pose_get_landmark_open(mv_inference_pose_result_h result, int pose_index, int part_index,
750                                                                                 mv_point_s *location, float *score)
751 {
752         mv_inference_pose_s *pose_obj = static_cast<mv_inference_pose_s *>(result);
753
754         if (pose_index < 0 || pose_index >= pose_obj->number_of_poses)
755                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
756
757         if (part_index < 0 || part_index >= pose_obj->number_of_landmarks_per_pose)
758                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
759
760         *location = pose_obj->landmarks[pose_index][part_index].point;
761         *score = pose_obj->landmarks[pose_index][part_index].score;
762         LOGI("[%d]:(%dx%d) - %.4f", pose_index, location->x, location->y, *score);
763
764         return MEDIA_VISION_ERROR_NONE;
765 }
766
767 int mv_inference_pose_get_label_open(mv_inference_pose_result_h result, int pose_index, int *label)
768 {
769         mv_inference_pose_s *pose_obj = static_cast<mv_inference_pose_s *>(result);
770
771         if (pose_index < 0 || pose_index >= pose_obj->number_of_poses)
772                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
773
774         *label = pose_obj->landmarks[pose_index][0].label;
775         LOGI("[%d]: label(%d)", pose_index, *label);
776
777         return MEDIA_VISION_ERROR_NONE;
778 }
779
780 int mv_pose_create_open(mv_pose_h *pose)
781 {
782         if (pose == NULL) {
783                 LOGE("Handle can't be created because handle pointer is NULL");
784                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
785         }
786
787         (*pose) = static_cast<mv_pose_h>(new (std::nothrow) Posture);
788
789         if (*pose == NULL) {
790                 LOGE("Failed to create pose handle");
791                 return MEDIA_VISION_ERROR_OUT_OF_MEMORY;
792         }
793
794         LOGD("Inference handle [%p] has been created", *pose);
795
796         return MEDIA_VISION_ERROR_NONE;
797 }
798
799 int mv_pose_destroy_open(mv_pose_h pose)
800 {
801         if (!pose) {
802                 LOGE("Hand can't be destroyed because handle is NULL");
803                 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
804         }
805
806         LOGD("Destroy pose handle [%p]", pose);
807         delete static_cast<Posture *>(pose);
808         LOGD("Pose handle has been destroyed");
809
810         return MEDIA_VISION_ERROR_NONE;
811 }
812
813 int mv_pose_set_from_file_open(mv_pose_h pose, const char *motionCaptureFilePath, const char *motionMappingFilePath)
814 {
815         Posture *pPose = static_cast<Posture *>(pose);
816
817         // check file
818         if (access(motionCaptureFilePath, F_OK) || access(motionMappingFilePath, F_OK)) {
819                 LOGE("Invalid Motion Capture file path [%s]", motionCaptureFilePath);
820                 LOGE("Invalid Motion Mapping file path [%s]", motionMappingFilePath);
821
822                 return MEDIA_VISION_ERROR_INVALID_PATH;
823         }
824
825         int ret = pPose->setPoseFromFile(std::string(motionCaptureFilePath), std::string(motionMappingFilePath));
826         if (ret != MEDIA_VISION_ERROR_NONE) {
827                 LOGE("Fail to setPoseFromFile");
828                 return ret;
829         }
830
831         return MEDIA_VISION_ERROR_NONE;
832 }
833
834 int mv_pose_compare_open(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score)
835 {
836         Posture *pPose = static_cast<Posture *>(pose);
837         std::vector<std::pair<bool, cv::Point> > actionParts;
838         mv_inference_pose_s *pAction = static_cast<mv_inference_pose_s *>(action);
839
840         for (int k = 0; k < HUMAN_POSE_MAX_LANDMARKS; ++k) {
841                 if (pAction->landmarks[0][k].point.x == -1 || pAction->landmarks[0][k].point.y == -1) {
842                         actionParts.push_back(std::make_pair(false, cv::Point(-1, -1)));
843                         continue;
844                 }
845
846                 actionParts.push_back(
847                                 std::make_pair(true, cv::Point(pAction->landmarks[0][k].point.x, pAction->landmarks[0][k].point.y)));
848         }
849
850         int ret = pPose->compare(parts, actionParts, score);
851         if (ret != MEDIA_VISION_ERROR_NONE) {
852                 LOGE("Fail to compare");
853                 return ret;
854         }
855
856         LOGD("score: %1.4f", *score);
857
858         return MEDIA_VISION_ERROR_NONE;
859 }