From: Inki Dae Date: Fri, 4 Jun 2021 03:46:30 +0000 (+0900) Subject: Move mv_inference into mv_machine_learning X-Git-Tag: submit/tizen/20210624.045334~20 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=580dd98e68d435513b14bc99e963f32aa7b4495d;p=platform%2Fcore%2Fapi%2Fmediavision.git Move mv_inference into mv_machine_learning Moved mv_inference directory in mv_machine_learning directory. Mediavision will support not only inference API also learning API. So it makes a mv_machine_learning directory on top of Mediavision, and moves existing mv_inference into mv_machine_learning directory. This is a first step for adding learning API support for Mediavision. Change-Id: I2d8597064141b2392bfdad0854e08dadd8d98258 Signed-off-by: Inki Dae --- diff --git a/CMakeLists.txt b/CMakeLists.txt index dcfeb034..cbf0dd9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,7 +33,7 @@ set(MV_FACE_LIB_NAME "mv_face" CACHE STRING "Name of the library will be built for barcode generating module (without extension).") set(MV_SURVEILLANCE_LIB_NAME "mv_surveillance" CACHE STRING "Name of the library will be built for surveillance module (without extension).") -set(MV_INFERENCE_LIB_NAME "mv_inference" CACHE STRING +set(MV_MACHINE_LEARNING_LIB_NAME "mv_machine_learning" CACHE STRING "Name of the library will be built for inference module (without extension).") @@ -79,9 +79,9 @@ endif() if(MEDIA_VISION_INFERENCE_LICENSE_PORT) add_definitions(-DMEDIA_VISION_INFERENCE_LICENSE_PORT) - SET(INC_INFERENCE "${PROJECT_SOURCE_DIR}/mv_inference/inference_lic/include") + SET(INC_INFERENCE "${PROJECT_SOURCE_DIR}/mv_machine_learning/mv_inference/inference_lic/include") else() - SET(INC_INFERENCE "${PROJECT_SOURCE_DIR}/mv_inference/inference/include") + SET(INC_INFERENCE "${PROJECT_SOURCE_DIR}/mv_machine_learning/mv_inference/inference/include") endif() INCLUDE_DIRECTORIES(${INC_DIR} @@ -146,7 +146,7 @@ ADD_SUBDIRECTORY(mv_barcode) ADD_SUBDIRECTORY(mv_image) ADD_SUBDIRECTORY(mv_face) ADD_SUBDIRECTORY(mv_surveillance) -ADD_SUBDIRECTORY(mv_inference) +ADD_SUBDIRECTORY(mv_machine_learning) INSTALL( DIRECTORY ${INC_DIR}/ DESTINATION include/media @@ -158,7 +158,7 @@ INSTALL( SET(PC_NAME ${fw_name}) SET(PC_REQUIRED "${fw_name}-barcode ${fw_name}-face ${fw_name}-image ${fw_name}-surveillance ${fw_name}-inference") SET(PC_LDFLAGS "-l${MV_COMMON_LIB_NAME} -l${MV_BARCODE_DETECTOR_LIB_NAME} -l${MV_BARCODE_GENERATOR_LIB_NAME} \ --l${MV_IMAGE_LIB_NAME} -l${MV_FACE_LIB_NAME} -l${MV_SURVEILLANCE_LIB_NAME} -l${MV_INFERENCE_LIB_NAME}") +-l${MV_IMAGE_LIB_NAME} -l${MV_FACE_LIB_NAME} -l${MV_SURVEILLANCE_LIB_NAME} -l${MV_MACHINE_LEARNING_LIB_NAME}") SET(PC_CFLAGS -I\${includedir}/media) CONFIGURE_FILE( @@ -218,7 +218,7 @@ CONFIGURE_FILE( INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-surveillance.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig) SET(PC_NAME ${fw_name}-inference) -SET(PC_LDFLAGS "-l${MV_INFERENCE_LIB_NAME} -l${MV_COMMON_LIB_NAME}") +SET(PC_LDFLAGS "-l${MV_MACHINE_LEARNING_LIB_NAME} -l${MV_COMMON_LIB_NAME}") CONFIGURE_FILE( ${fw_name}.pc.in ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}-inference.pc diff --git a/mv_inference/CMakeLists.txt b/mv_inference/CMakeLists.txt deleted file mode 100644 index 1f034f2d..00000000 --- a/mv_inference/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -project(mv_inference_port) -cmake_minimum_required(VERSION 2.6) - -if(MEDIA_VISION_INFERENCE_LICENSE_PORT) - add_subdirectory(${PROJECT_SOURCE_DIR}/inference_lic) # Licensed port -else() - add_subdirectory(${PROJECT_SOURCE_DIR}/inference) # Open port -endif() diff --git a/mv_inference/inference/CMakeLists.txt b/mv_inference/inference/CMakeLists.txt deleted file mode 100644 index 05de57cd..00000000 --- a/mv_inference/inference/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -project(${MV_INFERENCE_LIB_NAME}) -CMAKE_MINIMUM_REQUIRED(VERSION 2.6) - -SET_PROPERTY(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG _DEBUG) - -SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) -SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) -SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) - -INCLUDE_DIRECTORIES("${INC_DIR}") -INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/include") -INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/src") - -SET(dependents "inference-engine-interface-common iniparser json-glib-1.0") -INCLUDE(FindPkgConfig) -pkg_check_modules(${fw_name} REQUIRED ${dependents}) -FOREACH(flag ${${fw_name}_CFLAGS}) - SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}") - SET(EXTRA_CXXFLAGS "${EXTRA_CXXFLAGS} ${flag}") -ENDFOREACH(flag) - - -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXXFLAGS} -Wno-unused-parameter -Wno-sign-compare") -FILE(GLOB MV_INFERENCE_INCLUDE_LIST "${PROJECT_SOURCE_DIR}/include/*.h" "${PROJECT_SOURCE_DIR}/include/*.hpp") -FILE(GLOB MV_INFERENCE_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp") - -FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc) -if(NOT OpenCV_FOUND) - MESSAGE(SEND_ERROR "OpenCV NOT FOUND") - RETURN() -else() - INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS}) -endif() - - -if(FORCED_STATIC_BUILD) - ADD_LIBRARY(${PROJECT_NAME} STATIC ${MV_INFERENCE_INCLUDE_LIST} ${MV_INFERENCE_SOURCE_LIST}) -else() - ADD_LIBRARY(${PROJECT_NAME} SHARED ${MV_INFERENCE_INCLUDE_LIST} ${MV_INFERENCE_SOURCE_LIST}) -endif() - -TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} inference-engine-interface-common dlog iniparser json-glib-1.0) - -INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR}) diff --git a/mv_inference/inference/include/Bvh.h b/mv_inference/inference/include/Bvh.h deleted file mode 100644 index 6b9b8533..00000000 --- a/mv_inference/inference/include/Bvh.h +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef __MEDIA_VISION_BVH_H__ -#define __MEDIA_VISION_BVH_H__ - -#include -#include -#include "Joint.h" -#include -#include - -namespace mediavision -{ -namespace inference -{ - - /** Class created for storing motion data from bvh file */ - class Bvh { - public: - /** Constructor of Bvh object - * @details Initializes local variables - */ - Bvh() : num_frames_(0), frame_time_(0), num_channels_(0) {} - - /** - * Recalculation of local transformation matrix for each frame in each joint - * - * Should be called to set local_transformation_matrix vectors in joints - * structures. - * - * @param start_joint A joint of which each child local transformation - * matrix will be recalculated, as default it is NULL which will be resolved - * to root_joint in method body - */ - void recalculate_joints_ltm(std::shared_ptr start_joint = NULL); - - /** Adds joint to Bvh object - * @details Adds joint and increases number of data channels - * @param joint The joint that will be added - */ - void add_joint(const std::shared_ptr joint) { - joints_.push_back(joint); - num_channels_ += joint->num_channels(); - } - - /** Gets the root joint - * @return The root joint - */ - const std::shared_ptr root_joint() const { return root_joint_; } - - /** Gets all joints - * @return The all joints - */ - const std::vector > joints() const { - return joints_; - } - - /** Gets the number of data frames - * @return The number of frames - */ - unsigned num_frames() const { return num_frames_; } - - /** Gets the frame time - * @return The single frame time (in second) - */ - double frame_time() const { return frame_time_; } - - /** Gets the total number of channels - * @return The number of data channels - */ - unsigned num_channels() const { return num_channels_; } - - /** Sets the root joint - * @param arg The root joint to be set - */ - void set_root_joint(const std::shared_ptr arg) { root_joint_ = arg; } - - /** Sets the all joint at once - * @param arg The all joints to be set - */ - void set_joints(const std::vector > arg) { - joints_ = arg; - } - - /** Sets the number of data frames - * @param arg The number of frames to be set - */ - void set_num_frames(const unsigned arg) { num_frames_ = arg; } - - /** Sets the single data frame time - * @param arg The time of frame to be set - */ - void set_frame_time(const double arg) { frame_time_ = arg; } - - private: - /** A root joint in this bvh file */ - std::shared_ptr root_joint_; - /** All joints in file in order of parse */ - std::vector > joints_; - /** A number of motion frames in this bvh file */ - unsigned num_frames_; - /** A time of single frame */ - double frame_time_; - /** Number of channels of all joints */ - unsigned num_channels_; - }; - -} // namespace -} -#endif // __MEDIA_VISION_BVH_H__ diff --git a/mv_inference/inference/include/BvhParser.h b/mv_inference/inference/include/BvhParser.h deleted file mode 100644 index c96bedb3..00000000 --- a/mv_inference/inference/include/BvhParser.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef __MEDIA_VISION_BVH_PARSER_H__ -#define __MEDIA_VISION_BVH_PARSER_H__ - -#include "Bvh.h" -#include "Joint.h" - -#include -#include -#include -#include - -namespace mediavision -{ -namespace inference -{ - - /** Bvh Parser class that is responsible for parsing .bvh file */ - class BvhParser { - public: - BvhParser() : bvh_(NULL) {}; - ~BvhParser() = default; - - /** Parses single bvh file and stored data into bvh structure - * @param path The path to file to be parsed - * @param bvh The pointer to bvh object where parsed data will be stored - * @return 0 if success, -1 otherwise - */ - int parse(const std::string& path, Bvh* bvh); - - private: - /** Parses single hierarchy in bvh file - * @param file The input stream that is needed for reading file content - * @return 0 if success, -1 otherwise - */ - int parse_hierarchy(std::ifstream& file); - - /** Parses joint and its children in bvh file - * @param file The input stream that is needed for reading file content - * @param parent The pointer to parent joint - * @param parsed The output parameter, here will be stored parsed joint - * @return 0 if success, -1 otherwise - */ - int parse_joint(std::ifstream& file, std::shared_ptr parent, - std::shared_ptr & parsed); - - /** Parses order of channel for single joint - * @param file The input stream that is needed for reading file content - * @param joint The pointer to joint that channels order will be parsed - * @return 0 if success, -1 otherwise - */ - int parse_channel_order(std::ifstream& file, std::shared_ptr joint); - - /** Parses motion part data - * @param file The input stream that is needed for reading file content - * @return 0 if success, -1 otherwise - */ - int parse_motion(std::ifstream& file); - - /** Trims the string, removes leading and trailing whitespace from it - * @param s The string, which leading and trailing whitespace will be - * trimmed - */ - inline void trim(std::string &s) { - s.erase( std::remove_if( s.begin(), s.end(), - std::bind( std::isspace, std::placeholders::_1, - std::locale::classic() ) ), s.end() ); - } - - - /** The path to file that was parsed previously */ - std::string path_; - - /** The bvh object to store parsed data */ - Bvh* bvh_; - }; -} -} // namespace -#endif // __MEDIA_VISION_BVH_PARSER_H__ diff --git a/mv_inference/inference/include/BvhUtils.h b/mv_inference/inference/include/BvhUtils.h deleted file mode 100644 index 9d1a131b..00000000 --- a/mv_inference/inference/include/BvhUtils.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef __MEDIA_VISION_BVH_UTILS_H__ -#define __MEDIA_VISION_BVH_UTILS_H__ - -#include -#include - -#include - -#include -#include -namespace mediavision -{ -namespace inference -{ - -/** Enumeration class for axis */ -enum class Axis { - X, - Y, - Z -}; - -/** Creates rotation matrix - * @param angle The rotation angle - * @param axis The rotation axis - * @return The rotation matrix - */ -cv::Mat rotation_matrix(float angle, Axis axis); - -/** Rotates matrix - * @param matrix The matrix to be rotated - * @param angle The rotation angle - * @param axis The rotation axis - * @return The rotation matrix - */ -cv::Mat rotate(cv::Mat matrix, float angle, Axis axis); - -} // namespace -} -#endif //__MEDIA_VISION_BVH_UTILS_H__ \ No newline at end of file diff --git a/mv_inference/inference/include/Inference.h b/mv_inference/inference/include/Inference.h deleted file mode 100644 index 6c88b953..00000000 --- a/mv_inference/inference/include/Inference.h +++ /dev/null @@ -1,388 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_INFERENCE_H__ -#define __MEDIA_VISION_INFERENCE_H__ - -#include -#include - -#include "mv_common.h" -#include "inference_engine_error.h" -#include "inference_engine_common_impl.h" -#include "mv_inference_private.h" -#include -#include -#include -#include "Metadata.h" -#include "PreProcess.h" -#include "PostProcess.h" -#include "TensorBuffer.h" - -#define HUMAN_POSE_MAX_LANDMARKS 16 -#define HUMAN_POSE_MAX_PARTS 6 - -/** - * @file Inference.h - * @brief This file contains the inference class definition which - * provides inference interface. - */ -using namespace InferenceEngineInterface::Common; - -typedef struct _ImageClassficationResults { - int number_of_classes; - std::vector indices; - std::vector names; - std::vector confidences; -} ImageClassificationResults; /**< structure ImageClassificationResults */ - -typedef struct _ObjectDetectionResults { - int number_of_objects; - std::vector indices; - std::vector names; - std::vector confidences; - std::vector locations; -} ObjectDetectionResults; /**< structure ObjectDetectionResults */ - -typedef struct _FaceDetectionResults { - int number_of_faces; - std::vector confidences; - std::vector locations; -} FaceDetectionResults; /**< structure FaceDetectionResults */ - -typedef struct _FacialLandMarkDetectionResults { - int number_of_landmarks; - std::vector locations; -} FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */ - -typedef struct _PoseLandmarkDetectionResults { - int number_of_landmarks; - std::vector locations; - std::vector score; -} PoseLandmarkDetectionResults; /**< structure PoseLandmarkDetectionResults */ - -namespace mediavision -{ -namespace inference -{ - struct TensorInfo { - int width; - int height; - int dim; - int ch; - }; - - struct InferenceConfig { - /** - * @brief Default constructor for the @ref InferenceConfig - * - * @since_tizen 5.0 - */ - InferenceConfig(); - - std::string mConfigFilePath; /**< Path of a model configuration file */ - - std::string mWeightFilePath; /**< Path of a model weight file */ - - std::string mUserFilePath; /**< Path of model user file */ - - TensorInfo mTensorInfo; /**< Tensor information */ - - mv_inference_data_type_e mDataType; /**< Data type of a input tensor */ - - mv_inference_backend_type_e mBackedType; /**< Backed type of model files */ - - int mTargetTypes; /**< Target type to run inference */ - - double mConfidenceThresHold; /**< Confidence threshold value */ - - double mMeanValue; /**< The mean value for normalization */ - - double mStdValue; /**< The scale factor value for normalization */ - - int mMaxOutputNumbers; - - std::vector mInputLayerNames; /**< The input layer names */ - std::vector mOutputLayerNames; /**< The output layer names */ - }; - - class Inference - { - public: - /** - * @brief Creates an Inference class instance. - * - * @since_tizen 5.5 - */ - Inference(); - - /** - * @brief Destroys an Inference class instance including - * its all resources. - * - * @since_tizen 5.5 - */ - ~Inference(); - - /** - * @brief Configure modelfiles - * - * @since_tizen 5.5 - */ - void ConfigureModelFiles(const std::string modelConfigFilePath, - const std::string modelWeightFilePath, - const std::string modelUserFilePath); - - /** - * @brief Configure input tensor information - * - * @since_tizen 5.5 - * @remarks deprecated Replayced by ConfigureInputInfo - */ - void ConfigureTensorInfo(int width, int height, int dim, int ch, - double stdValue, double meanValue); - - /** - * @brief Configure input information - * - * @since_tizen 6.0 - */ - void ConfigureInputInfo(int width, int height, int dim, int ch, - double stdValue, double meanValue, int dataType, - const std::vector names); - - void ConfigureOutputInfo(std::vector names); - - /** - * @brief Configure inference backend type. - * - * @since_tizen 6.0 - */ - int ConfigureBackendType(const mv_inference_backend_type_e backendType); - - /** - * @brief Configure a inference target device type such as CPU, GPU or NPU. (only one type can be set) - * @details Internally, a given device type will be converted to new type. - * This API is just used for backward compatibility. - * - * @since_tizen 6.0 (Deprecated) - */ - int ConfigureTargetTypes(const int targetType); - - /** - * @brief Configure inference target devices such as CPU, GPU or NPU. (one more types can be combined) - * - * @since_tizen 6.0 - */ - int ConfigureTargetDevices(const int targetDevices); - - /** - * @brief Configure the maximum number of inference results - * - * @since_tizen 5.5 - */ - void ConfigureOutput(const int maxOutputNumbers); - - /** - * @brief Configure the confidence threshold - * - * @since_tizen 5.5 - */ - void ConfigureThreshold(const double threshold); - - /** - * @brief Parses the metadata file path - * - * @since_tizen 6.5 - */ - int ParseMetadata(const std::string filePath); - - /** - * @brief Bind a backend engine - * @details Use this function to bind a backend engine for the inference. - * This creates a inference engine common class object, and loads a backend - * library which inferfaces with a Neural Network runtime such as TF Lite, - * OpenCV, ARMNN and so on. - * - * Ps. The created inference engine common object will be released and its - * corresponding backend library will be unbound when deconstructor - * of Inference class will be called. - * - * @since_tizen 6.0 - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - */ - int Bind(); - - /** - * @brief Set default configuration for the inference - * @details Use this function to set default configuration given in json file by user. - * - * Ps. this callback should be called after Bind callback. - * - * @since_tizen 6.0 - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - */ - int Prepare(); - - /** - * @brief Load model files - * @details Use this function to load given model files for the inference. - * - * Ps. this callback should be called after Prepare callback. - * - * @since_tizen 6.0 - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - */ - int Load(); - - /** - * @brief Runs inference with a region of a given image - * @details Use this function to run forward pass with the given image. - * The given image is preprocessed and the region of the image is - * thrown to neural network. Then, the output tensor is returned. - * If roi is NULL, then full source will be analyzed. - * - * @since_tizen 5.5 - * @return @c true on success, otherwise a negative error value - */ - int Run(std::vector &mvSources, - std::vector &rects); - - /** - * @brief Gets that given engine is supported or not - * - * @since_tizen 5.5 - * @return @c true on success, otherwise a negative error value - */ - std::pair GetSupportedInferenceBackend(int backend); - - /** - * @brief Gets the ImageClassificationResults - * - * @since_tizen 5.5 - * @return @c true on success, otherwise a negative error value - */ - int GetClassficationResults(ImageClassificationResults *classificationResults); - - /** - * @brief Gets the ObjectDetectioResults - * - * @since_tizen 5.5 - * @return @c true on success, otherwise a negative error value - */ - int GetObjectDetectionResults(ObjectDetectionResults *detectionResults); - - /** - * @brief Gets the FaceDetectioResults - * - * @since_tizen 5.5 - * @return @c true on success, otherwise a negative error value - */ - int GetFaceDetectionResults(FaceDetectionResults *detectionResults); - - /** - * @brief Gets the FacialLandmarkDetectionResults - * - * @since_tizen 5.5 - * @return @c true on success, otherwise a negative error value - */ - int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results); - - /** - * @brief Gets the PoseLandmarkDetectionResults - * - * @since_tizen 6.0 - * @return @c true on success, otherwise a negative error value - */ - int GetPoseLandmarkDetectionResults(mv_inference_pose_result_h *detectionResults, - int width, int height); - - int GetResults(std::vector > *dimInfo, - std::vector *results); - - mv_engine_config_h GetEngineConfig(void) - { - return engine_config; - } - - void SetEngineConfig(mv_engine_config_h config) - { - engine_config = config; - } - - int GetTargetType() - { - return mConfig.mTargetTypes; - } - - private: - bool mCanRun; /**< The flag indicating ready to run Inference */ - InferenceConfig mConfig; - inference_engine_capacity mBackendCapacity; - std::map > mSupportedInferenceBackend; - cv::Size mInputSize; - int mCh; - int mDim; - double mDeviation; - double mMean; - double mThreshold; - int mOutputNumbers; - cv::Size mSourceSize; - cv::Mat mInputBuffer; - mv_engine_config_h engine_config; - InferenceEngineCommon *mBackend; - std::map mModelFormats; - std::vector mUserListName; - //std::map mInputTensorBuffers; - TensorBuffer mInputTensorBuffers; - inference_engine_layer_property mInputLayerProperty; - //std::map mOutputTensorBuffers; - TensorBuffer mOutputTensorBuffers; - inference_engine_layer_property mOutputLayerProperty; - - mv_inference_pose_s *mPoseResult; - - Metadata mMetadata; - PreProcess mPreProc; - PostProcess mPostProc; - - private: - void CheckSupportedInferenceBackend(); - int ConvertEngineErrorToVisionError(int error); - int ConvertTargetTypes(int given_types); - int ConvertToCv(int given_type); - inference_tensor_data_type_e ConvertToIE(int given_type); - int Preprocess(cv::Mat cvImg, cv::Mat cvDst, int data_type); - int PrepareTenosrBuffers(void); - void CleanupTensorBuffers(void); - int SetUserFile(std::string filename); - int FillOutputResult(tensor_t &outputData); - - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_INFERENCE_H__ */ diff --git a/mv_inference/inference/include/InferenceIni.h b/mv_inference/inference/include/InferenceIni.h deleted file mode 100644 index 7a586148..00000000 --- a/mv_inference/inference/include/InferenceIni.h +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_INFERENCE_INI_H__ -#define __MEDIA_VISION_INFERENCE_INI_H__ - -#include -#include -#include - -namespace mediavision -{ -namespace inference -{ - class InferenceInI - { - public: - /** - * @brief Creates an Inference class instance. - * - * @since_tizen 5.5 - */ - InferenceInI(); - - /** - * @brief Destroys an Inference class instance including - * its all resources. - * - * @since_tizen 5.5 - */ - ~InferenceInI(); - - /** - * @brief Load() - * - * @since_tizen 5.5 - */ - int LoadInI(); - - /** - * @brief Unload() - * - * @since_tizen 5.5 - */ - void UnLoadInI(); - - std::vector GetSupportedInferenceEngines(); - - private: - std::vector mSupportedInferenceBackend; - std::string mIniDefaultPath; - std::string mDefaultBackend; - std::string mDelimeter; - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_INFERENCE_H__ */ diff --git a/mv_inference/inference/include/InputMetadata.h b/mv_inference/inference/include/InputMetadata.h deleted file mode 100644 index 01da01cb..00000000 --- a/mv_inference/inference/include/InputMetadata.h +++ /dev/null @@ -1,127 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_INPUTMETADATA_H__ -#define __MEDIA_VISION_INPUTMETADATA_H__ - -#include -#include -#include - -#include -#include -#include - -/** - * @file InputMetadata.h - * @brief This file contains the metadata class definition which - * provides metadata of a model. - */ - -namespace mediavision -{ -namespace inference -{ - class Options - { - public: - class Normalization - { - public: - bool use; - std::vector mean; - std::vector std; - - Normalization() : use(false) {} - ~Normalization() = default; - }; - - class Quantization - { - public: - bool use; - std::vector scale; - std::vector zeropoint; - - Quantization() : use(false) {}; - ~Quantization() = default; - }; - - Normalization normalization; - Quantization quantization; - - Options() = default; - ~Options() = default; - }; - - class LayerInfo - { - public: - - std::string name; - std::vector dims; - mv_colorspace_e colorSpace; - mv_inference_data_type_e dataType; - inference_tensor_shape_type_e shapeType; // TODO: define mv_inference_shape_type_e - - LayerInfo() = default; - ~LayerInfo() = default; - - int GetWidth() const; - int GetHeight() const; - int GetChannel() const; - }; - - class InputMetadata - { - public: - bool parsed; - std::map layer; - std::map option; - - /** - * @brief Creates an InputMetadata class instance. - * - * @since_tizen 6.5 - */ - InputMetadata() : parsed(false) {}; - - /** - * @brief Destroys an InputMetadata class instance including - * its all resources. - * - * @since_tizen 6.5 - */ - ~InputMetadata() = default; - - /** - * @brief Parses an InputMetadata - * - * @since_tizen 6.5 - */ - int Parse(JsonObject *root); - - private: - int GetTensorInfo(JsonObject* root); - int GetPreProcess(JsonObject* root); - mv_colorspace_e ConvertTypeToMD(const std::string& type); - - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_INPUTMETADATA_H__ */ diff --git a/mv_inference/inference/include/Joint.h b/mv_inference/inference/include/Joint.h deleted file mode 100644 index d28a70c2..00000000 --- a/mv_inference/inference/include/Joint.h +++ /dev/null @@ -1,232 +0,0 @@ -#ifndef __MEDIA_VISION_JOINT_H__ -#define __MEDIA_VISION_JOINT_H__ - -#include -#include -#include -#include - -namespace mediavision -{ -namespace inference -{ - - /** Class created for storing single joint data from bvh file */ - class Joint { - public: - /** A struct that keep offset of joint in relation to parent */ - struct Offset { - float x; - float y; - float z; - }; - - /** A enumeration type useful for set order of channels for every joint */ - enum class Channel { - XPOSITION, - YPOSITION, - ZPOSITION, - ZROTATION, - XROTATION, - YROTATION - }; - - /** A string names for each channel */ - const std::vector channel_name_str = { - "XPOSITION", - "YPOSITION", - "ZPOSITION", - "ZROTATION", - "XROTATION", - "YROTATION" - }; - - /** Adds single frame motion data - * @param data The motion data to be added - */ - void add_frame_motion_data(const std::vector & data) { - channel_data_.push_back(data); - } - - /** Gets the parent joint of this joint - * @return The parent joint - */ - std::shared_ptr parent() const { return parent_; } - - /** Gets the name of this joint - * @return The joint's name - */ - std::string name() const { return name_; } - - /** Gets the offset of this joint - * @return The joint's offset - */ - Offset offset() const { return offset_; } - - /** Gets the channels order of this joint - * @return The joint's channels order - */ - std::vector channels_order() const { - return channels_order_; - } - - /** Gets the all children joints of this joint - * @return The joint's children - */ - std::vector > children() const { - return children_; - } - - /** Gets the channels data of this joint for all frames - * @return The joint's channel data - */ - const std::vector >& channel_data() const { - return channel_data_; - } - - /** Gets the channel data of this joint for selected frame - * @param frame The frame for which channel data will be returned - * @return The joint's channel data for selected frame - */ - const std::vector & channel_data(unsigned frame) const { - return channel_data_[frame]; - } - - /** Gets the channel data of this joint for selected frame and channel - * @param frame The frame for which channel data will be returned - * @param channel_num The number of channel which data will be returned - * @return The joint's channel data for selected frame and channel - */ - float channel_data(unsigned frame, unsigned channel_num) const { - return channel_data_[frame][channel_num]; - } - - /** Gets the local transformation matrix for this joint for all frames - * @return The joint's local transformation matrix - */ - std::vector ltm() const { - return ltm_; - } - - /** Gets the local transformation matrix for this joint for selected frame - * @param frame The frame for which ltm will be returned - * @return The joint's local transformation matrix for selected frame - */ - cv::Mat ltm(unsigned frame) const { - return ltm_[frame]; - } - - /** Gets the position for this joint for all frames - * @return The joint's position - */ - std::vector pos() const { - return pos_; - } - - /** Gets the position for this joint for selected frame - * @param frame The frame for which ltm will be returned - * @return The joint's position for selected frame - */ - cv::Vec3f pos(unsigned frame) const { - return pos_[frame]; - } - - /** Gets the number of channels of this joint - * @return The joint's channels number - */ - unsigned num_channels() const { return channels_order_.size(); } - - /** Sets the this joint parent joint - * @param arg The parent joint of this joint - */ - void set_parent(const std::shared_ptr arg) { parent_ = arg; } - - /** Sets the this joint name - * @param arg The name of this joint - */ - void set_name(const std::string arg) { name_ = arg; } - - /** Sets the this joint offset - * @param arg The offset of this joint - */ - void set_offset(const Offset arg) { offset_ = arg; } - - /** Sets the this joint channels order - * @param arg The channels order of this joint - */ - void set_channels_order(const std::vector & arg) { - channels_order_ = arg; - } - - /** Sets the this joint children - * @param arg The children of this joint - */ - void set_children(const std::vector >& arg) { - children_ = arg; - } - - /** Sets the this joint channels data - * @param arg The channels data of this joint - */ - void set_channel_data(const std::vector >& arg) { - channel_data_ = arg; - } - - /** Sets local transformation matrix for selected frame - * @param matrix The local transformation matrix to be set - * @param frame The number of frame for which you want set ltm. As - * default it is set to 0. - */ - void set_ltm(const cv::Mat matrix, unsigned frame = 0) { - if (frame > 0 && frame < ltm_.size()) - ltm_[frame] = matrix; - else - ltm_.push_back(matrix); - } - - /** Sets local transformation matrix for selected frame - * @param pos The position of joint in selected frame to be set - * @param frame The number of frame for which you want set position. As - * default it is set to 0. - */ - void set_pos(const cv::Vec3f pos, unsigned frame = 0) { - if (frame > 0 && frame < pos_.size()) - pos_[frame] = pos; - else - pos_.push_back(pos); - } - - /** Gets channels name of this joint - * @return The joint's channels name - */ - const std::vector get_channels_name() const { - std::vector channel_names; - - for (int i = 0; i < channels_order_.size(); i++) - channel_names.push_back(channel_name_str[static_cast( - channels_order_[i])]); - - return channel_names; - } - - private: - /** Parent joint in file hierarchy */ - std::shared_ptr parent_; - std::string name_; - Offset offset_; - /** Order of joint's input channels */ - std::vector channels_order_; - /** Pointers to joints that are children of this in hierarchy */ - std::vector > children_; - /** Structure for keep joint's channel's data. - * Each vector keep data for one channel. - */ - std::vector > channel_data_; - /** Local transformation matrix for each frame */ - std::vector ltm_; - /** Vector x, y, z of joint position for each frame */ - std::vector pos_; - }; -} -} // namespace -#endif // __MEDIA_VISION_JOINT_H__ diff --git a/mv_inference/inference/include/Metadata.h b/mv_inference/inference/include/Metadata.h deleted file mode 100644 index ecf9ef6a..00000000 --- a/mv_inference/inference/include/Metadata.h +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_METADATA_H__ -#define __MEDIA_VISION_METADATA_H__ - -#include -#include - -#include "mv_common.h" -#include "mv_inference_private.h" -#include -#include "InputMetadata.h" -#include "OutputMetadata.h" -#include - -/** - * @file Metadata.h - * @brief This file contains the metadata class definition which - * provides metadata of a model. - */ - -namespace mediavision -{ -namespace inference -{ - class Metadata - { - public: - /** - * @brief Creates an Metadata class instance. - * - * @since_tizen 6.5 - */ - Metadata() = default; - - /** - * @brief Destroys an Metadata class instance including - * its all resources. - * - * @since_tizen 6.5 - */ - ~Metadata() = default; - - /** - * @brief Initializes an Metadata class - * - * @since_tizen 6.5 - */ - int Init(const std::string& filename); - - /** - * @brief Parses a metafile and set values to InputMetadata - * and OutputMetadata - * - * @since_tizen 6.5 - */ - int Parse(); - - InputMetadata& GetInputMeta(); - OutputMetadata& GetOutputMeta(); - - private: - int ParseInputMeta(JsonObject *object); - int ParseOutputMeta(JsonObject *object); - - private: - std::string mMetafile; - - InputMetadata mInputMeta; - OutputMetadata mOutputMeta; - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_METADATA_H__ */ diff --git a/mv_inference/inference/include/ObjectDecoder.h b/mv_inference/inference/include/ObjectDecoder.h deleted file mode 100755 index f5324f22..00000000 --- a/mv_inference/inference/include/ObjectDecoder.h +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_OBJECTDECODER_H__ -#define __MEDIA_VISION_OBJECTDECODER_H__ - -#include -#include -#include -#include -#include - -#include "TensorBuffer.h" -#include "OutputMetadata.h" -#include "PostProcess.h" - -/** - * @file ObjectDecoder.h - * @brief This file contains the ObjectDecoder class definition which - * provides object decoder. - */ - -namespace mediavision -{ -namespace inference -{ - class ObjectDecoder - { - private: - TensorBuffer mTensorBuffer; - OutputMetadata mMeta; - int mBoxOffset; - int mNumberOfOjects; - - ScoreInfo& mScoreInfo; - BoxInfo& mBoxInfo; - - float mScaleW; - float mScaleH; - - Boxes mResultBoxes; - - float decodeScore(int idx); - Box decodeBox(int idx, float score, int label = -1); - Box decodeBoxWithAnchor(int idx, int anchorIdx, float score, cv::Rect2f& anchor); - - public: - ObjectDecoder(TensorBuffer& buffer, OutputMetadata& metaData, - int boxOffset, float scaleW, float scaleH, int numberOfObjects = 0) : - mTensorBuffer(buffer), mMeta(metaData), - mBoxOffset(boxOffset), mNumberOfOjects(numberOfObjects), - mScoreInfo(mMeta.GetScore()), mBoxInfo(mMeta.GetBox()), - mScaleW(scaleW), mScaleH(scaleH), - mResultBoxes() { - }; - - ~ObjectDecoder() = default; - - int init(); - int decode(); - Boxes& getObjectAll(); - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_OBJECTDECODER_H__ */ diff --git a/mv_inference/inference/include/OutputMetadata.h b/mv_inference/inference/include/OutputMetadata.h deleted file mode 100644 index f311ee41..00000000 --- a/mv_inference/inference/include/OutputMetadata.h +++ /dev/null @@ -1,255 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_OUTPUTMETADATA_H__ -#define __MEDIA_VISION_OUTPUTMETADATA_H__ - -#include -#include -#include -#include - -#include -#include -#include - -/** - * @file OutputMetadata.h - * @brief This file contains the metadata class definition which - * provides metadata of a model. - */ - -namespace mediavision -{ -namespace inference -{ - class DimInfo - { - private: - std::vector dims; - public: - std::vector GetValidIndexAll() const; - void SetValidIndex(int index); - }; - - class DeQuantization - { - private: - double scale; - double zeropoint; - public: - DeQuantization(double s, double z) : scale(s), zeropoint(z) {}; - ~DeQuantization() = default; - - double GetScale() { return scale; } - double GetZeroPoint() { return zeropoint; } - }; - - class ScoreInfo - { - private: - std::string name; - DimInfo dimInfo; - double threshold; - int type; - int topNumber; - std::shared_ptr deQuantization; - - public: - ScoreInfo() = default; - ~ScoreInfo() = default; - - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - double GetThresHold() { return threshold; } - int GetType() { return type; } - int GetTopNumber() { return topNumber; } - std::shared_ptr GetDeQuant() { return deQuantization; } - - int ParseScore(JsonObject *root); - }; - - class BoxInfo - { - public: - class DecodeInfo { - public: - class AnchorParam { - public: - int mode; /**< 0: generate anchor, 1:load pre-anchor*/ - int numLayers; - float minScale; - float maxScale; - int inputSizeHeight; - int inputSizeWidth; - float anchorOffsetX; - float anchorOffsetY; - std::vector strides; - std::vector aspectRatios; - bool isReduceBoxedInLowestLayer; - float interpolatedScaleAspectRatio; - bool isFixedAnchorSize; - bool isExponentialBoxScale; - float xScale; - float yScale; - float wScale; - float hScale; - - AnchorParam() = default; - ~AnchorParam() = default; - }; - - class NMSParam { - public: - int mode; /**< 0: IOU */ - float threshold; - - NMSParam() : mode(-1), threshold(0.2f) {}; - ~NMSParam() = default; - }; - - private: - AnchorParam anchorParam; - std::vector anchorBoxes; - NMSParam nmsParam; - - public: - DecodeInfo() = default; - ~DecodeInfo() = default; - std::vector& GetAnchorBoxAll(); - bool IsAnchorBoxEmpty(); - void AddAnchorBox(cv::Rect2f& ahcnor); - void ClearAnchorBox(); - - // Anchor param - int ParseAnchorParam(JsonObject *root); - int GenerateAnchor(); - bool IsFixedAnchorSize(); - bool IsExponentialBoxScale(); - float GetAnchorXscale(); - float GetAnchorYscale(); - float GetAnchorWscale(); - float GetAnchorHscale(); - float CalculateScale(float min, float max, int index, int maxStride); - - // Nms param - int ParseNms(JsonObject *root); - int GetNmsMode(); - float GetNmsThreshold(); - }; - - private: - std::string name; - DimInfo dimInfo; - int type; // 0:LTRB, 1: CxCyWH - std::vector order; // Order based on box type - int coordinate; // 0: ratio, 1: pixel - int decodingType; // 0: post-op, 1: achorbox(ssd), 2:yolo(?) - DecodeInfo decodingInfo; - - public: - BoxInfo() = default; - ~BoxInfo() = default; - - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - int GetType() { return type; } - std::vector GetOrder() { return order; } - int GetCoordinate() { return coordinate; } - int GetDecodingType() { return decodingType; } - DecodeInfo& GetDecodeInfo() {return decodingInfo; } - - int ParseBox(JsonObject *root); - }; - - class Label - { - private: - std::string name; - DimInfo dimInfo; - - public: - Label() = default; - ~Label() = default; - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - - int ParseLabel(JsonObject *root); - }; - - class Number - { - private: - std::string name; - DimInfo dimInfo; - - public: - Number() = default; - ~Number() = default; - std::string GetName() { return name; } - DimInfo GetDimInfo() { return dimInfo; } - - int ParseNumber(JsonObject *root); - }; - - class OutputMetadata - { - private: - bool parsed; - ScoreInfo score; - BoxInfo box; - Label label; - Number number; - - int ParseScore(JsonObject *root); - int ParseBox(JsonObject *root); - int ParseLabel(JsonObject *root); - int ParseNumber(JsonObject *root); - int ParseBoxDecodeInfo(JsonObject *root); - - public: - /** - * @brief Creates an OutputMetadata class instance. - * - * @since_tizen 6.5 - */ - OutputMetadata() : parsed(false) {}; - - /** - * @brief Destroys an OutputMetadata class instance including - * its all resources. - * - * @since_tizen 6.5 - */ - ~OutputMetadata() = default; - - /** @brief Parses an OutputMetadata - * - * @since_tizen 6.5 - */ - int Parse(JsonObject *root); - - bool IsParsed(); - ScoreInfo& GetScore(); - BoxInfo& GetBox(); - Label& GetLabel(); - Number& GetNumber(); - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_OUTPUTMETADATA_H__ */ diff --git a/mv_inference/inference/include/PostProcess.h b/mv_inference/inference/include/PostProcess.h deleted file mode 100644 index fbf64be1..00000000 --- a/mv_inference/inference/include/PostProcess.h +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_POSTPROCESS_H__ -#define __MEDIA_VISION_POSTPROCESS_H__ - -#include -#include -#include - -#include "mv_common.h" -#include "OutputMetadata.h" - -#include -#include - - -/** - * @file PostProcess.h - * @brief This file contains the PostProcess class definition which - * provides PostProcess after running inference. - */ - -/** - * @brief Box structure. - * @details Box structure includes index, score, location. - * - */ -typedef struct _Box { - int index; /**< index of box belonging to a category */ - float score; /**< score of box belonging to the index */ - cv::Rect2f location; /**< location of a box */ -} Box; - -using Boxes = std::vector; -using BoxesList = std::vector; - -namespace mediavision -{ -namespace inference -{ - class PostProcess - { - public: - /** - * @brief Creates an PostProcess class instance. - * - * @since_tizen 6.5 - */ - PostProcess() : mMaxScoreSize(3) {}; - - /** - * @brief Destroys an PostProcess class instance including - * its all resources. - * - * @since_tizen 6.5 - */ - ~PostProcess() = default; - - /** - * @brief Calculates sigmoid. - * - * @since_tizen 6.5 - */ - static float sigmoid(float value); - static float dequant(float value, float scale, float zeropoint); - - int ScoreClear(int size); - int ScorePush(float value, int index); - int ScorePop(std::vector>& top); - int Nms(BoxesList& boxeslist, int mode, float threshold, Boxes& nmsboxes); - - private: - std::priority_queue, - std::vector>, - std::greater>> mScore; - private: - int mMaxScoreSize; - - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_POSTPROCESS_H__ */ diff --git a/mv_inference/inference/include/Posture.h b/mv_inference/inference/include/Posture.h deleted file mode 100644 index 4c67fdcd..00000000 --- a/mv_inference/inference/include/Posture.h +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_POSE_H__ -#define __MEDIA_VISION_POSE_H__ - -#include -#include - -#include "mv_common.h" -#include "Inference.h" -#include "Bvh.h" -#include "BvhParser.h" -#include -#include -#include - -/** - * @file Pose.h - * @brief This file contains the pose class definition - */ - -namespace mediavision -{ -namespace inference -{ - class Posture - { - public: - /** - * @brief Creates an Posture class instance. - * - * @since_tizen 6.0 - */ - Posture(); - - /** - * @brief Destroys an Posture class instance including - * its all resources. - * - * @since_tizen 6.0 - */ - ~Posture(); - - /** - * @brief Sets file path - * - * @since_tizen 6.0 - */ - int setPoseFromFile(const std::string motionCaptureFilePath, - const std::string motionMappingFilePath); - - /** - * @brief Compares a pose for @a part and returns score - * - * @since_tizen 6.0 - */ - int compare(int parts, std::vector> action, - float* score); - - private: - cv::Vec2f getUnitVectors(cv::Point point1, cv::Point point2); - int getParts(int parts, - std::vector>& pose, - std::vector>>& posePart); - float getSimilarity(int parts, - std::vector>>& posePart, - std::vector>>& actionPart); - float cosineSimilarity(std::vector vec1, std::vector vec2, int size); - - private: - BvhParser mBvhParser; - Bvh mBvh; - std::map mMotionToPoseMap; /**< name, index */ - std::vector> mPose; - std::vector>> mPoseParts; - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_INFERENCE_H__ */ diff --git a/mv_inference/inference/include/PreProcess.h b/mv_inference/inference/include/PreProcess.h deleted file mode 100644 index f4c002bb..00000000 --- a/mv_inference/inference/include/PreProcess.h +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_PREPROCESS_H__ -#define __MEDIA_VISION_PREPORCESS_H__ - -#include -#include - -#include "mv_common.h" -#include "InputMetadata.h" - -#include -#include - - -/** - * @file PreProcess.h - * @brief This file contains the PreProcess class definition which - * provides PreProcess before running inference. - */ - -namespace mediavision -{ -namespace inference -{ - class PreProcess - { - public: - /** - * @brief Creates an PreProcess class instance. - * - * @since_tizen 6.5 - */ - PreProcess() = default; - - /** - * @brief Destroys an PreProcess class instance including - * its all resources. - * - * @since_tizen 6.5 - */ - ~PreProcess() = default; - - /** - * @brief Runs PreProcess with layerInfo and options - * - * @since_tizen 6.5 - */ - int Run(cv::Mat& source, const int colorSpace, const int dataType, const LayerInfo& layerInfo, - const Options& options, void* buffer); - - private: - int Resize(cv::Mat& source, cv::Mat& dest, cv::Size size); - int ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType); - int Normalize(cv::Mat& source, cv::Mat& dest, - const std::vector& mean, const std::vector& std); - - }; - -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_PREPROCESS_H__ */ diff --git a/mv_inference/inference/include/TensorBuffer.h b/mv_inference/inference/include/TensorBuffer.h deleted file mode 100644 index 9054ec7b..00000000 --- a/mv_inference/inference/include/TensorBuffer.h +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_TENSORBUFFER_H__ -#define __MEDIA_VISION_TENSORBUFFER_H__ - -#include -#include -#include -#include "mv_common.h" -#include "mv_inference_private.h" -#include -#include - - -/** - * @file TensorBuffer.h - * @brief This file contains the tensor buffer class definition which - * provides name and inference_engine_tensor_buffer. - */ - -using IETensorBuffer = std::map; -namespace mediavision -{ -namespace inference -{ - class TensorBuffer - { - private: - IETensorBuffer mTensorBuffer; - - public: - TensorBuffer() = default; - ~TensorBuffer() = default; - - bool empty(); - bool exist(std::string name); - void clear(); - size_t size(); - - IETensorBuffer& getAllTensorBuffer(); - inference_engine_tensor_buffer* getTensorBuffer(std::string name); - bool setTensorBuffer(std::string name, inference_engine_tensor_buffer& buffer); - - template - T getValue(std::string name, int idx); - }; -} /* Inference */ -} /* MediaVision */ - -#endif /* __MEDIA_VISION_TENSOR_BUFFER_H__ */ diff --git a/mv_inference/inference/include/mv_inference_open.h b/mv_inference/inference/include/mv_inference_open.h deleted file mode 100644 index 7f22ac98..00000000 --- a/mv_inference/inference/include/mv_inference_open.h +++ /dev/null @@ -1,706 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MEDIA_VISION_INFERENCE_OPEN_H__ -#define __MEDIA_VISION_INFERENCE_OPEN_H__ - -#include -#include -#include - -#ifdef __cplusplus -extern "C" -{ -#endif /* __cplusplus */ - - /** - * @file mv_inference_open.h - * @brief This file contains the Media Vision Inference Open API. - */ - - /*************/ - /* Inference */ - /*************/ - - mv_engine_config_h mv_inference_get_engine_config(mv_inference_h infer); - - /** - * @brief Create infernce handle. - * @details Use this function to create an inference handle. After creation - * the inference handle has to be prepared with - * @ref mv_inference_prepare() function to prepare an inference. - * - * @since_tizen 5.5 - * - * @param [out] infer The handle to the inference to be created - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * - * @post Release @a infer by using - * @ref mv_inference_destroy() function when it is not needed - * anymore - * - * @see mv_inference_destroy_open() - * @see mv_inference_prepare_open() - */ - int mv_inference_create_open(mv_inference_h *infer); - - /** - * @brief Destroy inference handle and releases all its resources. - * - * @since_tizen 5.5 - * - * @param [in] infer The handle to the inference to be destroyed - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * - * @pre Create an inference handle by using @ref mv_inference_create_open() - * - * @see mv_inference_create_open() - */ - int mv_inference_destroy_open(mv_inference_h infer); - - /** - * @brief Configure the inference model data to inference handle - * - * @since_tizen 5.5 - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int mv_inference_configure_model_open(mv_inference_h infer, - mv_engine_config_h engine_config); - - /** - * @brief Configure the tensor information to the inference handle - * - * @since_tizen 5.5 - * @remarks deprecated Replaced by mv_inference_configure_input_info - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - - int - mv_inference_configure_input_info_open(mv_inference_h infer, - mv_engine_config_h engine_config); - - /** - * @brief Configure the input information to the inference handle - * - * @since_tizen 6.0 - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int - mv_inference_configure_input_info_open(mv_inference_h infer, - mv_engine_config_h engine_config); - - /** - * @brief Configure the backend to the inference handle - * - * @since_tizen 5.5 - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int mv_inference_configure_engine_open(mv_inference_h infer, - mv_engine_config_h engine_config); - - /** - * @brief Configure the number of output to the inference handle - * - * @since_tizen 5.5 - * @remarks deprecated Replaced by mv_inference_configure_post_process_info_open - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int mv_inference_configure_output_open(mv_inference_h infer, - mv_engine_config_h engine_config); - - /** - * @brief Configure the confidence threshold value to the inference handle - * - * @since_tizen 5.5 - * @remarks deprecated Replaced by mv_inference_configure_post_process_info_open - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int mv_inference_configure_confidence_threshold_open( - mv_inference_h infer, mv_engine_config_h engine_config); - - /** - * @brief Configure the post process infomation to the inference handle - * - * @since_tizen 6.0 - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int mv_inference_configure_post_process_info_open( - mv_inference_h infer, mv_engine_config_h engine_config); - - /** - * @brief Configure the set of output node names to the inference handle - * - * @since_tizen 5.5 - * @remarks deprecated Replaced by mv_inference_configure_output_info_open - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int mv_inference_configure_output_node_names_open( - mv_inference_h infer, mv_engine_config_h engine_config); - - /** - * @brief Configure the output information to the inference handle - * - * @since_tizen 6.0 - * - * @param [in] infer The handle to the inference - * @param [in] engine_config The handle to the configuration of - * engine. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - */ - int - mv_inference_configure_output_info_open(mv_inference_h infer, - mv_engine_config_h engine_config); - - /** - * @brief Prepare inference. - * @details Use this function to prepare inference based on - * the configured network. - * - * @since_tizen 5.5 - * - * @param [in] infer The handle to the inference - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data - * in @a engine_config - * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - */ - int mv_inference_prepare_open(mv_inference_h infer); - - /** - * @brief Traverses the list of supported engines for inference. - * @details Using this function the supported engines can be obtained. - * The names can be used with mv_engine_config_h related - * getters and setters to get/set MV_INFERENCE_BACKEND_TYPE attribute - * value. - * - * @since_tizen 5.5 - * @param [in] infer The handle to the inference - * @param [in] callback The iteration callback function - * @param [in] user_data The user data to be passed to the callback function - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * - * @pre @a engine_cfg must be created - * - * @see mv_engine_config_set_string_attribute() - * @see mv_engine_config_get_string_attribute() - */ - int mv_inference_foreach_supported_engine_open( - mv_inference_h infer, mv_inference_supported_engine_cb callback, - void *user_data); - - /** - * @brief Performs image classification on the @a source - * @details Use this function to launch image classification. - * Each time when mv_inference_image_classify is - * called, @a classified_cb will receive classes - * which the media source may belong to. - * - * @since_tizen 5.5 - * - * @param [in] source The handle to the source of the media - * @param [in] infer The handle to the inference - * @param [in] roi Rectangular box bounding the region-of-interest on the - * @a source. If NULL, then full source will be - * analyzed. - * @param [in] classified_cb The callback which will be called for - * classification on media source. - * This callback will receive classification results. - * @param [in] user_data The user data passed from the code where - * @ref mv_inference_image_classify_open() is invoked. This data will - * be accessible from @a classified_cb callback. - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace - * isn't supported - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * - * @pre Create a source handle by calling @ref mv_create_source() - * @pre Create an inference handle by calling @ref mv_inference_create() - * @pre Configure an inference handle by calling @ref mv_inference_configure() - * @pre Prepare an inference by calling @ref mv_inference_prepare() - * @post @a classified_cb will be called to process classification results - * - * @see mv_inference_image_classified_cb - */ - int mv_inference_image_classify_open( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_image_classified_cb classified_cb, void *user_data); - - /** - * @brief Performs object detection on the @a source - * @details Use this function to launch object detection. - * Each time when mv_inference_object_detection is - * called, @a detected_cb will receive a list of objects and their locations - * on the media source. - * - * @since_tizen 5.5 - * - * @param [in] source The handle to the source of the media - * @param [in] infer The handle to the inference - * @param [in] detected_cb The callback which will be called for - * detecting objects on media source. - * This callback will receive the detection results. - * @param [in] user_data The user data passed from the code where - * @ref mv_inference_object_detect() is invoked. This data will - * be accessible from @a detected_cb callback. - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace - * isn't supported - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * - * @pre Create a source handle by calling @ref mv_create_source() - * @pre Create an inference handle by calling @ref mv_inference_create() - * @pre Configure an inference handle by calling @ref mv_inference_configure() - * @pre Prepare an inference by calling @ref mv_inference_prepare() - * @post @a detected_cb will be called to process detection results - * - * @see mv_inference_object_detected_cb - */ - int - mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer, - mv_inference_object_detected_cb detected_cb, - void *user_data); - - /** - * @brief Performs face detection on the @a source - * @details Use this function to launch face detection. - * Each time when mv_inference_face_detection is - * called, @a detected_cb will receive a list of faces and their locations - * on the media source. - * - * @since_tizen 5.5 - * - * @param [in] source The handle to the source of the media - * @param [in] infer The handle to the inference - * @param [in] detected_cb The callback which will be called for - * detecting faces on media source. - * This callback will receive the detection results. - * @param [in] user_data The user data passed from the code where - * @ref mv_inference_face_detect() is invoked. This data will - * be accessible from @a detected_cb callback. - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace - * isn't supported - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * - * @pre Create a source handle by calling @ref mv_create_source() - * @pre Create an inference handle by calling @ref mv_inference_create() - * @pre Configure an inference handle by calling @ref mv_inference_configure() - * @pre Prepare an inference by calling @ref mv_inference_prepare() - * @post @a detected_cb will be called to process detection results - * - * @see mv_inference_face_detected_cb - */ - int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer, - mv_inference_face_detected_cb detected_cb, - void *user_data); - - /** - * @brief Performs facial landmarks detection on the @a source - * @details Use this function to launch facial landmark detection. - * Each time when mv_inference_facial_landmark_detect() is - * called, @a detected_cb will receive a list facial landmark's locations - * on the media source. - * - * @since_tizen 5.5 - * - * @param [in] source The handle to the source of the media - * @param [in] infer The handle to the inference - * @param[in] roi Rectangular box bounding face image on the - * @a source. If NULL, then full source will be - * analyzed. - * @param [in] detected_cb The callback which will be called for - * detecting facial landmark on media source. - * This callback will receive the detection results. - * @param [in] user_data The user data passed from the code where - * @ref mv_inference_facial_landmark_detect() is invoked. - * This data will be accessible from @a detected_cb callback. - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace - * isn't supported - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * - * @pre Create a source handle by calling @ref mv_create_source() - * @pre Create an inference handle by calling @ref mv_inference_create() - * @pre Configure an inference handle by calling @ref mv_inference_configure() - * @pre Prepare an inference by calling @ref mv_inference_prepare() - * @post @a detected_cb will be called to process detection results - * - * @see mv_inference_facial_landmark_detected_cb - */ - int mv_inference_facial_landmark_detect_open( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_facial_landmark_detected_cb detected_cb, - void *user_data); - - /** - * @brief Performs pose landmarks detection on the @a source. - * @details Use this function to launch pose landmark detection. - * Each time when mv_inference_pose_landmark_detect_open() is - * called, @a detected_cb will receive a list pose landmark's locations - * in the media source. - * - * @since_tizen 6.0 - * @remarks This function is synchronous and may take considerable time to run. - * - * @param[in] source The handle to the source of the media - * @param[in] infer The handle to the inference - * @param[in] roi Rectangular area including a face in @a source which - * will be analyzed. If NULL, then the whole source will be - * analyzed. - * @param[in] detected_cb The callback which will receive the detection results. - * @param[in] user_data The user data passed from the code where - * mv_inference_pose_landmark_detect_open() is invoked. - * This data will be accessible in @a detected_cb callback. - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace - * isn't supported - * - * @pre Create a source handle by calling mv_create_source_open() - * @pre Create an inference handle by calling mv_inference_create_open() - * @pre Configure an inference handle by calling mv_inference_configure_open() - * @pre Prepare an inference by calling mv_inference_prepare_open() - * @post @a detected_cb will be called to provide detection results - * - * @see mv_inference_pose_landmark_detected_cb() - */ - int mv_inference_pose_landmark_detect_open( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_pose_landmark_detected_cb detected_cb, - void *user_data); - - /** - * @brief Gets the number of pose. - * - * @since_tizen 6.0 - * - * @param[in] result The handle to inference result - * @param[out] number_of_poses The pointer to the number of poses - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - */ - int mv_inference_pose_get_number_of_poses_open( - mv_inference_pose_result_h result, int *number_of_poses); - - /** - * @brief Gets the number of landmark per a pose. - * - * @since_tizen 6.0 - * - * @param[in] result The handle to inference result - * @param[out] number_of_landmarks The pointer to the number of landmarks - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - */ - int mv_inference_pose_get_number_of_landmarks_open( - mv_inference_pose_result_h result, int *number_of_landmarks); - - /** - * @brief Gets landmark location of a part of a pose. - * - * @since_tizen 6.0 - * - * @param[in] result The handle to inference result - * @param[in] pose_index The pose index - * @param[in] pose_part The part of a pose - * @param[out] location The location of a landmark - * @param[out] score The score of a landmark - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * - */ - int mv_inference_pose_get_landmark_open( - mv_inference_pose_result_h result, int pose_index, int part_index, mv_point_s *location, float *score); - - /** - * @brief Gets a label of a pose. - * - * @since_tizen 6.0 - * - * @param[in] result The handle to inference result - * @param[in] pose_index The pose index between 0 and - * the number of poses which can be gotten by - * mv_inference_pose_get_number_of_poses() - * @param[out] label The label of a pose - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * - * @see mv_inference_pose_get_number_of_poses() - * @see mv_inference_pose_get_number_of_landmarks() - * @see mv_inference_pose_landmark_detected_cb() - * @see mv_inference_pose_result_h - */ - int mv_inference_pose_get_label_open( - mv_inference_pose_result_h result, int pose_index, int *label); - - /** - * @brief Creates pose handle. - * @details Use this function to create a pose. - * - * @since_tizen 6.0 - * - * @remarks The @a pose should be released using mv_pose_destroy_open(). - * - * @param[out] infer The handle to the pose to be created - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory - * - * @see mv_pose_destroy_open() - */ - int mv_pose_create_open(mv_pose_h *pose); - - /** - * @brief Destroys pose handle and releases all its resources. - * - * @since_tizen 6.0 - * - * @param[in] pose The handle to the pose to be destroyed - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * - * @pre Create pose handle by using mv_pose_create_open() - * - * @see mv_pose_create_open() - */ - int mv_pose_destroy_open(mv_pose_h pose); - - /** - * @brief Sets a motion capture file and its pose mapping file to the pose. - * @details Use this function to set a motion capture file and - * its pose mapping file. These are used by mv_pose_compare_open() - * to compare a given pose by mv_inference_pose_landmark_estimation_open(). - * - * @since_tizen 6.0 - * @remarks If the app sets paths to media storage, - * then the media storage privilege - * %http://tizen.org/privilege/mediastorage is needed.\n - * If the app sets the paths to external storage, - * then the external storage privilege - * %http://tizen.org/privilege/externalstorage is needed.\n - * If the required privileges aren't set properly, - * mv_pose_set_from_file_open() will returned #MEDIA_VISION_ERROR_PERMISSION_DENIED. - * - * @param[in] pose The handle to the pose - * @param[in] motionCaptureFilePath The file path to the motion capture file - * @param[in] motionMappingFilePath The file path to the motion mapping file - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of file paths - * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error - */ - int mv_pose_set_from_file_open(mv_pose_h pose, const char *motionCaptureFilePath, const char *motionMappingFilePath); - - /** - * @brief Compares an action pose with the pose which is set by mv_pose_set_from_file_open(). - * @details Use this function to compare action pose with the pose - * which is set by mv_pose_set_from_file_open(). - * Parts to be compared can be selected by #mv_inference_human_body_part_e. - * Their similarity can be given by the score between 0 ~ 1. - * - * @since_tizen 6.0 - * - * @param[in] pose The handle to the pose - * @param[in] action The action pose - * @param[in] parts The parts to be compared - * @param[out] score The similarity score - * - * @return @c 0 on success, otherwise a negative error value - * @retval #MEDIA_VISION_ERROR_NONE Successful - * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported - * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter - * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation - * - * @pre Sets the pose by using mv_pose_set_from_file() - */ - int mv_pose_compare_open(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */ diff --git a/mv_inference/inference/src/Bvh.cpp b/mv_inference/inference/src/Bvh.cpp deleted file mode 100644 index 80d75dfe..00000000 --- a/mv_inference/inference/src/Bvh.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "Bvh.h" -#include "BvhUtils.h" -#include "mv_private.h" -#include - -namespace mediavision -{ -namespace inference -{ - - void Bvh::recalculate_joints_ltm(std::shared_ptr start_joint) { - - LOGI("ENTER"); - - if (start_joint == NULL) - { - if (root_joint_ == NULL) - return; - else - start_joint = root_joint_; - } - - LOGD("%s joint", start_joint->name().c_str()); - //LOG(DEBUG) << "recalculate_joints_ltm: " << start_joint->name(); - cv::Mat offmat_backup = cv::Mat::eye(4,4, CV_32F); - offmat_backup.at(0,3) = start_joint->offset().x; - offmat_backup.at(1,3) = start_joint->offset().y; - offmat_backup.at(2,3) = start_joint->offset().z; - - std::vector> data = start_joint->channel_data(); - - for (int i = 0; i < num_frames_; i++) { - cv::Mat offmat = offmat_backup; // offset matrix - cv::Mat rmat = cv::Mat::eye(4,4,CV_32F); // identity matrix set on rotation matrix - cv::Mat tmat = cv::Mat::eye(4,4,CV_32F); // identity matrix set on translation matrix - - for (int j = 0; j < start_joint->channels_order().size(); j++) { - if (start_joint->channels_order()[j] == Joint::Channel::XPOSITION) - tmat.at(0,3) = data[i][j]; - else if (start_joint->channels_order()[j] == Joint::Channel::YPOSITION) - tmat.at(1,3) = data[i][j]; - else if (start_joint->channels_order()[j] == Joint::Channel::ZPOSITION) - tmat.at(2,3) = data[i][j]; - else if (start_joint->channels_order()[j] == Joint::Channel::XROTATION) - rmat = rotate(rmat, data[i][j], Axis::X); - else if (start_joint->channels_order()[j] == Joint::Channel::YROTATION) - rmat = rotate(rmat, data[i][j], Axis::Y); - else if (start_joint->channels_order()[j] == Joint::Channel::ZROTATION) - rmat = rotate(rmat, data[i][j], Axis::Z); - } - - cv::Mat ltm = cv::Mat::eye(4,4,CV_32F); // local transformation matrix - - if (start_joint->parent() != NULL) - ltm = start_joint->parent()->ltm(i) * offmat; - else - ltm = tmat * offmat; - - cv::Vec3f wPos(ltm.at(0,3),ltm.at(1,3), ltm.at(2,3)); - start_joint->set_pos(wPos); - //LOG(TRACE) << "Joint world position: " << utils::vec3tos(ltm[3]); - LOGD("Joint world position: %f, %f, %f", wPos[0], wPos[1], wPos[2]); - - ltm = ltm * rmat; - - //LOG(TRACE) << "Local transformation matrix: \n" << utils::mat4tos(ltm); - - start_joint->set_ltm(ltm, i); - } // num frame - - for (auto& child : start_joint->children()) { - recalculate_joints_ltm(child); - } - - LOGI("LEAVE"); - } // recalculate_joints_ltm - -} // end of bvh -} diff --git a/mv_inference/inference/src/BvhParser.cpp b/mv_inference/inference/src/BvhParser.cpp deleted file mode 100644 index 6205c832..00000000 --- a/mv_inference/inference/src/BvhParser.cpp +++ /dev/null @@ -1,397 +0,0 @@ -/** - * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "BvhParser.h" -#include "mv_private.h" - -#include -#include -#include -#include - -/** Indicate whether bvh parser allows multi hierarchy or not - * Not fully tested - */ -#define MULTI_HIERARCHY 0 - -namespace { - -const std::string kChannels = "CHANNELS"; -const std::string kEnd = "End"; -const std::string kEndSite = "End Site"; -const std::string kFrame = "Frame"; -const std::string kFrames = "Frames:"; -const std::string kHierarchy = "HIERARCHY"; -const std::string kJoint = "JOINT"; -const std::string kMotion = "MOTION"; -const std::string kOffset = "OFFSET"; -const std::string kRoot = "ROOT"; - -const std::string kXpos = "Xposition"; -const std::string kYpos = "Yposition"; -const std::string kZpos = "Zposition"; -const std::string kXrot = "Xrotation"; -const std::string kYrot = "Yrotation"; -const std::string kZrot = "Zrotation"; - -} - -namespace mediavision -{ -namespace inference -{ - - //############################################################################## - // Main parse function - //############################################################################## - int BvhParser::parse(const std::string& path, Bvh* bvh) { - LOGI("ENTER"); - //LOG(INFO) << "Parsing file : " << path; - - path_ = path; - bvh_ = bvh; - - std::ifstream file; - file.open(path_); - - if (file.is_open()) { - std::string token; - - #if MULTI_HIERARCHY == 1 - while (file.good()) { - #endif - file >> token; - if (token == kHierarchy) { - int ret = parse_hierarchy(file); - if (ret) - return ret; - } else { - //LOG(ERROR) << "Bad structure of .bvh file. " << kHierarchy - // << " should be on the top of the file"; - return -1; - } - #if MULTI_HIERARCHY == 1 - } - #endif - } else { - //LOG(ERROR) << "Cannot open file to parse : " << path_; - return -1; - } - - LOGI("LEAVE"); - return 0; - } - - //############################################################################## - // Function parsing hierarchy - //############################################################################## - int BvhParser::parse_hierarchy(std::ifstream& file) { - //LOG(INFO) << "Parsing hierarchy"; - - std::string token; - int ret; - - if (file.good()) { - file >> token; - - //########################################################################## - // Parsing joints - //########################################################################## - if (token == kRoot) { - std::shared_ptr rootJoint; - ret = parse_joint(file, nullptr, rootJoint); - - if (ret) - return ret; - - LOGI("There is %d data channels", bvh_->num_channels()); - - bvh_->set_root_joint(rootJoint); - } else { - LOGE("Bad structure of .bvh file."); - - return -1; - } - } - - if (file.good()) { - file >> token; - - //########################################################################## - // Parsing motion data - //########################################################################## - if (token == kMotion) { - ret = parse_motion(file); - - if (ret) - return ret; - } else { - LOGE("Bad structure of .bvh file."); - - return -1; - } - } - return 0; - } - - //############################################################################## - // Function parsing joint - //############################################################################## - int BvhParser::parse_joint(std::ifstream& file, - std::shared_ptr parent, std::shared_ptr & parsed) { - - //LOG(TRACE) << "Parsing joint"; - - std::shared_ptr joint = std::make_shared(); - joint->set_parent(parent); - - std::string name; - file >> name; - - LOGD("Joint name %s", name.c_str()); - - joint->set_name(name); - - std::string token; - std::vector > children; - int ret; - - file >> token; // Consuming '{' - file >> token; - - //############################################################################ - // Offset parsing - //############################################################################ - if (token == kOffset) { - Joint::Offset offset; - - try { - file >> offset.x >> offset.y >> offset.z; - } catch (const std::ios_base::failure& e) { - //LOG(ERROR) << "Failure while parsing offset"; - return -1; - } - - joint->set_offset(offset); - - //LOG(TRACE) << "Offset x: " << offset.x << ", y: " << offset.y << ", z: " - // << offset.z; - - } else { - //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kOffset << ", but " - // << "found \"" << token << "\""; - - return -1; - } - - file >> token; - - //############################################################################ - // Channels parsing - //############################################################################ - if (token == kChannels) { - ret = parse_channel_order(file, joint); - - //LOG(TRACE) << "Joint has " << joint->num_channels() << " data channels"; - - if (ret) - return ret; - } else { - //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kChannels - // << ", but found \"" << token << "\""; - - return -1; - } - - file >> token; - - bvh_->add_joint(joint); - - //############################################################################ - // Children parsing - //############################################################################ - - while (file.good()) { - //########################################################################## - // Child joint parsing - //########################################################################## - if (token == kJoint) { - std::shared_ptr child; - ret = parse_joint(file, joint, child); - - if (ret) - return ret; - - children.push_back(child); - - //########################################################################## - // Child joint parsing - //########################################################################## - } else if (token == kEnd) { - file >> token >> token; // Consuming "Site {" - - std::shared_ptr tmp_joint = std::make_shared (); - - tmp_joint->set_parent(joint); - tmp_joint->set_name(kEndSite); - children.push_back(tmp_joint); - - file >> token; - - //######################################################################## - // End site offset parsing - //######################################################################## - if (token == kOffset) { - Joint::Offset offset; - - try { - file >> offset.x >> offset.y >> offset.z; - } catch (const std::ios_base::failure& e) { - //LOG(ERROR) << "Failure while parsing offset"; - return -1; - } - - tmp_joint->set_offset(offset); - - // LOG(TRACE) << "Joint name : EndSite"; - // LOG(TRACE) << "Offset x: " << offset.x << ", y: " << offset.y << ", z: " - // << offset.z; - - file >> token; // Consuming "}" - - } else { - //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kOffset - // << ", but found \"" << token << "\""; - - return -1; - } - - bvh_->add_joint(tmp_joint); - //########################################################################## - // End joint parsing - //########################################################################## - } else if (token == "}") { - joint->set_children(children); - parsed = joint; - return 0; - } - - file >> token; - } - - //LOG(ERROR) << "Cannot parse joint, unexpected end of file. Last token : " - // << token; - return -1; - } - - //############################################################################## - // Motion data parse function - //############################################################################## - int BvhParser::parse_motion(std::ifstream& file) { - - LOGI("ENTER"); - - std::string token; - file >> token; - - int frames_num; - - if (token == kFrames) { - file >> frames_num; - bvh_->set_num_frames(frames_num); - LOGD("Num of frames: %d", frames_num); - } else { - LOGE("Bad structure of .bvh file"); - - return -1; - } - - file >> token; - - double frame_time; - - if (token == kFrame) { - file >> token; // Consuming 'Time:' - file >> frame_time; - bvh_->set_frame_time(frame_time); - LOGD("Frame time: %f",frame_time); - - float number; - for (int i = 0; i < frames_num; i++) { - for (auto joint : bvh_->joints()) { - std::vector data; - for (int j = 0; j < joint->num_channels(); j++) { - file >> number; - data.push_back(number); - } - LOGD("%s joint", joint->name().c_str()); - joint->add_frame_motion_data(data); - } - } - } else { - LOGE("Bad structure of .bvh file."); - return -1; - } - - LOGI("LEAVE"); - - return 0; - } - - //############################################################################## - // Channels order parse function - //############################################################################## - int BvhParser::parse_channel_order(std::ifstream& file, - std::shared_ptr joint) { - - LOGI("ENTER"); - - int num; - file >> num; - LOGD("Number of channels: %d",num); - - std::vector channels; - std::string token; - - for (int i = 0; i < num; i++) { - file >> token; - if (token == kXpos) - channels.push_back(Joint::Channel::XPOSITION); - else if (token == kYpos) - channels.push_back(Joint::Channel::YPOSITION); - else if (token == kZpos) - channels.push_back(Joint::Channel::ZPOSITION); - else if (token == kXrot) - channels.push_back(Joint::Channel::XROTATION); - else if (token == kYrot) - channels.push_back(Joint::Channel::YROTATION); - else if (token == kZrot) - channels.push_back(Joint::Channel::ZROTATION); - else { - //LOG(ERROR) << "Not valid channel!"; - return -1; - } - } - - joint->set_channels_order(channels); - - LOGI("LEAVE"); - - return 0; - } - -} -} // namespace diff --git a/mv_inference/inference/src/BvhUtils.cpp b/mv_inference/inference/src/BvhUtils.cpp deleted file mode 100644 index ba11a910..00000000 --- a/mv_inference/inference/src/BvhUtils.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "BvhUtils.h" -#include - -#define DegreeToRadian(degree) ((degree) * (M_PI/180.f)) - -namespace mediavision -{ -namespace inference -{ - cv::Mat rotation_matrix(float angle, Axis axis) { - cv::Mat matrix = cv::Mat::eye(4,4,CV_32F); - - float rangle = DegreeToRadian(angle); - - // We want to unique situation when in matrix are -0.0f, so we perform - // additional checking - float sin_a = sin(rangle); - if (fabs(sin_a) < std::numeric_limits::epsilon()) - sin_a = 0.0f; - float cos_a = cos(rangle); - if (fabs(cos_a) < std::numeric_limits::epsilon()) - cos_a = 0.0f; - float msin_a = fabs(sin_a) < std::numeric_limits::epsilon() ? - 0.0f : (-1.0f) * sin_a; - - if (axis == Axis::X) { - matrix.at(1,1) = cos_a; - matrix.at(2,1) = sin_a; - matrix.at(1,2) = msin_a; - matrix.at(2,2) = cos_a; - } else if (axis == Axis::Y) { - matrix.at(0,0) = cos_a; - matrix.at(2,0) = msin_a; - matrix.at(0,2) = sin_a; - matrix.at(2,2) = cos_a; - } else { - matrix.at(0,0) = cos_a; - matrix.at(1,0) = sin_a; - matrix.at(0,1) = msin_a; - matrix.at(1,1) = cos_a; - } - - return matrix; - } - - /** Rotates matrix - * @param matrix The matrix to be rotated - * @param angle The rotation angle - * @param axis The rotation axis - * @return The rotation matrix - */ - cv::Mat rotate(cv::Mat matrix, float angle, Axis axis) { - return matrix * rotation_matrix(angle, axis); - } -} -} \ No newline at end of file diff --git a/mv_inference/inference/src/Inference.cpp b/mv_inference/inference/src/Inference.cpp deleted file mode 100755 index aab4b815..00000000 --- a/mv_inference/inference/src/Inference.cpp +++ /dev/null @@ -1,1756 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" -#include "Inference.h" -#include "InferenceIni.h" -#include "ObjectDecoder.h" -#include - -#include -#include -#include -#include -#include - -#define MV_INFERENCE_OUTPUT_NUMBERS_MAX 10 -#define MV_INFERENCE_OUTPUT_NUMBERS_MIN 1 -#define MV_INFERENCE_CONFIDENCE_THRESHOLD_MAX 1.0 -#define MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN 0.0 - -typedef enum { - InputAttrNoType = 0, - InputAttrFloat32 = 1, - InputAttrInt32 = 2, - InputAttrUInt8 = 3, - InputAttrInt64 = 4, - InputAttrString = 5, - InputAttrBool = 6, -} InputAttrType; - -namespace mediavision -{ -namespace inference -{ - InferenceConfig::InferenceConfig() : - mConfigFilePath(), - mWeightFilePath(), - mUserFilePath(), - mDataType(MV_INFERENCE_DATA_FLOAT32), - mBackedType(MV_INFERENCE_BACKEND_NONE), - mTargetTypes(MV_INFERENCE_TARGET_DEVICE_CPU), - mConfidenceThresHold(), - mMeanValue(), - mStdValue(), - mMaxOutputNumbers(1) - { - mTensorInfo.width = -1; - mTensorInfo.height = -1; - mTensorInfo.dim = -1; - mTensorInfo.ch = -1; - } - - Inference::Inference() : - mCanRun(), - mConfig(), - mBackendCapacity(), - mSupportedInferenceBackend(), - mInputSize(cv::Size()), - mCh(), - mDim(), - mDeviation(), - mMean(), - mThreshold(), - mOutputNumbers(), - mSourceSize(cv::Size()), - mInputBuffer(cv::Mat()), - engine_config(), - mBackend(), - mPoseResult(NULL), - mMetadata(), - mPreProc(), - mPostProc() - { - LOGI("ENTER"); - - mSupportedInferenceBackend.insert(std::make_pair( - MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false))); - mSupportedInferenceBackend.insert(std::make_pair( - MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false))); - mSupportedInferenceBackend.insert(std::make_pair( - MV_INFERENCE_BACKEND_ARMNN, std::make_pair("armnn", false))); - mSupportedInferenceBackend.insert(std::make_pair( - MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false))); - mSupportedInferenceBackend.insert(std::make_pair( - MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false))); - - CheckSupportedInferenceBackend(); - - for (auto& backend : mSupportedInferenceBackend) { - LOGI("%s: %s", backend.second.first.c_str(), - backend.second.second ? "TRUE" : "FALSE"); - } - - mModelFormats.insert(std::make_pair( - "caffemodel", INFERENCE_MODEL_CAFFE)); - mModelFormats.insert( - std::make_pair("pb", INFERENCE_MODEL_TF)); - mModelFormats.insert(std::make_pair( - "tflite", INFERENCE_MODEL_TFLITE)); - mModelFormats.insert( - std::make_pair("t7", INFERENCE_MODEL_TORCH)); - mModelFormats.insert(std::make_pair( - "weights", INFERENCE_MODEL_DARKNET)); - mModelFormats.insert( - std::make_pair("bin", INFERENCE_MODEL_DLDT)); - mModelFormats.insert( - std::make_pair("onnx", INFERENCE_MODEL_ONNX)); - mModelFormats.insert(std::make_pair( - "nb", INFERENCE_MODEL_VIVANTE)); - - LOGI("LEAVE"); - } - - Inference::~Inference() - { - CleanupTensorBuffers(); - - if (!mInputLayerProperty.layers.empty()) { - mInputLayerProperty.layers.clear(); - std::map().swap( - mInputLayerProperty.layers); - } - if (!mOutputLayerProperty.layers.empty()) { - mOutputLayerProperty.layers.clear(); - std::map().swap( - mOutputLayerProperty.layers); - } - - if (mPoseResult) { - for (int poseIndex = 0; poseIndex < mPoseResult->number_of_poses; ++poseIndex) { - delete [] mPoseResult->landmarks[poseIndex]; - } - delete [] mPoseResult->landmarks; - delete mPoseResult; - } - - mModelFormats.clear(); - - // Release backend engine. - if (mBackend) { - mBackend->UnbindBackend(); - delete mBackend; - } - - LOGI("Released backend engine."); - } - - void Inference::CheckSupportedInferenceBackend() - { - LOGI("ENTER"); - - InferenceInI ini; - ini.LoadInI(); - - std::vector supportedBackend = ini.GetSupportedInferenceEngines(); - for (auto& backend : supportedBackend) { - LOGI("engine: %d", backend); - - mSupportedInferenceBackend[backend].second = true; - } - - LOGI("LEAVE"); - } - - int Inference::ConvertEngineErrorToVisionError(int error) - { - int ret = MEDIA_VISION_ERROR_NONE; - - switch (error) { - case INFERENCE_ENGINE_ERROR_NONE: - ret = MEDIA_VISION_ERROR_NONE; - break; - case INFERENCE_ENGINE_ERROR_NOT_SUPPORTED: - ret = MEDIA_VISION_ERROR_NOT_SUPPORTED; - break; - case INFERENCE_ENGINE_ERROR_MSG_TOO_LONG: - ret = MEDIA_VISION_ERROR_MSG_TOO_LONG; - break; - case INFERENCE_ENGINE_ERROR_NO_DATA: - ret = MEDIA_VISION_ERROR_NO_DATA; - break; - case INFERENCE_ENGINE_ERROR_KEY_NOT_AVAILABLE: - ret = MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE; - break; - case INFERENCE_ENGINE_ERROR_OUT_OF_MEMORY: - ret = MEDIA_VISION_ERROR_OUT_OF_MEMORY; - break; - case INFERENCE_ENGINE_ERROR_INVALID_PARAMETER: - ret = MEDIA_VISION_ERROR_INVALID_PARAMETER; - break; - case INFERENCE_ENGINE_ERROR_INVALID_OPERATION: - ret = MEDIA_VISION_ERROR_INVALID_OPERATION; - break; - case INFERENCE_ENGINE_ERROR_PERMISSION_DENIED: - ret = MEDIA_VISION_ERROR_PERMISSION_DENIED; - break; - case INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT: - ret = MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; - break; - case INFERENCE_ENGINE_ERROR_INTERNAL: - ret = MEDIA_VISION_ERROR_INTERNAL; - break; - case INFERENCE_ENGINE_ERROR_INVALID_DATA: - ret = MEDIA_VISION_ERROR_INVALID_DATA; - break; - case INFERENCE_ENGINE_ERROR_INVALID_PATH: - ret = MEDIA_VISION_ERROR_INVALID_PATH; - break; - default: - LOGE("Unknown inference engine error type"); - } - - return ret; - } - - int Inference::ConvertTargetTypes(int given_types) - { - int target_types = INFERENCE_TARGET_NONE; - - if (given_types & MV_INFERENCE_TARGET_DEVICE_CPU) - target_types |= INFERENCE_TARGET_CPU; - if (given_types & MV_INFERENCE_TARGET_DEVICE_GPU) - target_types |= INFERENCE_TARGET_GPU; - if (given_types & MV_INFERENCE_TARGET_DEVICE_CUSTOM) - target_types |= INFERENCE_TARGET_CUSTOM; - - return target_types; - } - - int Inference::ConvertToCv(int given_type) - { - int type = 0; - - switch (given_type) { - case INFERENCE_TENSOR_DATA_TYPE_UINT8: - LOGI("Type is %d ch with UINT8", mCh); - type = mCh == 1 ? CV_8UC1 : CV_8UC3; - break; - case INFERENCE_TENSOR_DATA_TYPE_FLOAT32: - LOGI("Type is %d ch with FLOAT32", mCh); - type = mCh == 1 ? CV_32FC1 : CV_32FC3; - break; - default: - LOGI("unknown data type so FLOAT32 data type will be used in default"); - type = mCh == 1 ? CV_32FC1 : CV_32FC3; - break; - } - - return type; - } - - inference_tensor_data_type_e Inference::ConvertToIE(int given_type) - { - inference_tensor_data_type_e type = INFERENCE_TENSOR_DATA_TYPE_FLOAT32; - - switch (given_type) { - case MV_INFERENCE_DATA_FLOAT32: - type = INFERENCE_TENSOR_DATA_TYPE_FLOAT32; - break; - case MV_INFERENCE_DATA_UINT8: - type = INFERENCE_TENSOR_DATA_TYPE_UINT8; - break; - default: - LOGI("unknown data type so FLOAT32 data type will be used in default"); - break; - } - - return type; - } - - int Inference::Preprocess(cv::Mat cvImg, cv::Mat cvDst, int data_type) - { - mSourceSize = cvImg.size(); - int width = mInputSize.width; - int height = mInputSize.height; - - cv::Mat sample; - if (cvImg.channels() == 3 && mCh == 1) - cv::cvtColor(cvImg, sample, cv::COLOR_BGR2GRAY); - else - sample = cvImg; - - // size - cv::Mat sampleResized; - if (sample.size() != cv::Size(width, height)) - cv::resize(sample, sampleResized, cv::Size(width, height)); - else - sampleResized = sample; - - // type - cv::Mat sampleFloat; - if (mCh == 3) - sampleResized.convertTo(sampleFloat, CV_32FC3); - else - sampleResized.convertTo(sampleFloat, CV_32FC1); - - // normalize - cv::Mat sampleNormalized; - cv::Mat meanMat; - if (mCh == 3) - meanMat = cv::Mat(sampleFloat.size(), CV_32FC3, - cv::Scalar((float) mMean, (float) mMean, - (float) mMean)); - else - meanMat = cv::Mat(sampleFloat.size(), CV_32FC1, - cv::Scalar((float) mMean)); - - cv::subtract(sampleFloat, meanMat, sampleNormalized); - - sampleNormalized /= static_cast(mDeviation); - - sampleNormalized.convertTo(cvDst, data_type); - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::SetUserFile(std::string filename) - { - std::ifstream fp(filename.c_str()); - if (!fp.is_open()) { - return MEDIA_VISION_ERROR_INVALID_PATH; - } - - std::string userListName; - while (!fp.eof()) { - std::getline(fp, userListName); - if (userListName.length()) - mUserListName.push_back(userListName); - } - - fp.close(); - - return MEDIA_VISION_ERROR_NONE; - } - - void Inference::ConfigureModelFiles(const std::string modelConfigFilePath, - const std::string modelWeightFilePath, - const std::string modelUserFilePath) - { - LOGI("ENTER"); - - mConfig.mConfigFilePath = modelConfigFilePath; - mConfig.mWeightFilePath = modelWeightFilePath; - mConfig.mUserFilePath = modelUserFilePath; - - LOGI("LEAVE"); - } - - void Inference::ConfigureTensorInfo(int width, int height, int dim, int ch, - double stdValue, double meanValue) - { - LOGI("ENTER"); - - mConfig.mTensorInfo = { width, height, dim, ch }; - mConfig.mStdValue = stdValue; - mConfig.mMeanValue = meanValue; - - LOGI("LEAVE"); - } - - void Inference::ConfigureInputInfo(int width, int height, int dim, int ch, - double stdValue, double meanValue, - int dataType, - const std::vector names) - { - LOGI("ENTER"); - - // FIXME: mConfig should be removed - mConfig.mTensorInfo = { width, height, dim, ch }; - mConfig.mStdValue = stdValue; - mConfig.mMeanValue = meanValue; - mConfig.mDataType = static_cast(dataType); - mConfig.mInputLayerNames = names; - - const InputMetadata& inputMeta = mMetadata.GetInputMeta(); - if (inputMeta.parsed) { - LOGI("use input meta"); - auto& layerInfo = inputMeta.layer.begin()->second; - if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW - mConfig.mTensorInfo.ch = layerInfo.dims[1]; - mConfig.mTensorInfo.dim = layerInfo.dims[0]; - mConfig.mTensorInfo.width = layerInfo.dims[3]; - mConfig.mTensorInfo.height = layerInfo.dims[2]; - } else if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NHWC) {// NHWC - mConfig.mTensorInfo.ch = layerInfo.dims[3]; - mConfig.mTensorInfo.dim = layerInfo.dims[0]; - mConfig.mTensorInfo.width = layerInfo.dims[2]; - mConfig.mTensorInfo.height = layerInfo.dims[1]; - } else { - LOGE("Invalid shape type[%d]", layerInfo.shapeType); - } - - if (!inputMeta.option.empty()) { - auto& option = inputMeta.option.begin()->second; - if (option.normalization.use) { - mConfig.mMeanValue = option.normalization.mean[0]; - mConfig.mStdValue = option.normalization.std[0]; - } - } - - mConfig.mDataType = layerInfo.dataType; - mConfig.mInputLayerNames.clear(); - for (auto& layer : inputMeta.layer) { - mConfig.mInputLayerNames.push_back(layer.first); - } - } - - inference_engine_layer_property property; - // In case of that a inference plugin deosn't support to get properties, - // the tensor info given by a user will be used. - // If the plugin supports that, the given info will be ignored. - - for (auto& name : mConfig.mInputLayerNames) { - inference_engine_tensor_info tensor_info; - tensor_info.data_type = ConvertToIE(dataType); - - // In case of OpenCV, only supports NCHW - tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW; - // modify to handle multiple tensor infos - tensor_info.shape.push_back(mConfig.mTensorInfo.dim); - tensor_info.shape.push_back(mConfig.mTensorInfo.ch); - tensor_info.shape.push_back(mConfig.mTensorInfo.height); - tensor_info.shape.push_back(mConfig.mTensorInfo.width); - - tensor_info.size = 1; - for (auto& dim : tensor_info.shape) { - tensor_info.size *= dim; - } - - property.layers.insert(std::make_pair(name, tensor_info)); - } - - int ret = mBackend->SetInputLayerProperty(property); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to set input layer property"); - } - - LOGI("LEAVE"); - } - - void Inference::ConfigureOutputInfo(const std::vector names) - { - LOGI("ENTER"); - - mConfig.mOutputLayerNames = names; - - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { - mConfig.mOutputLayerNames.clear(); - if (!outputMeta.GetScore().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetScore().GetName()); - - if (!outputMeta.GetBox().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetBox().GetName()); - - if (!outputMeta.GetLabel().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetLabel().GetName()); - - if (!outputMeta.GetNumber().GetName().empty()) - mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName()); - } - - inference_engine_layer_property property; - - inference_engine_tensor_info tensor_info = { std::vector{1}, - INFERENCE_TENSOR_SHAPE_NCHW, - INFERENCE_TENSOR_DATA_TYPE_FLOAT32, - 1}; - for (auto& name : mConfig.mOutputLayerNames) { - property.layers.insert(std::make_pair(name, tensor_info)); - } - - int ret = mBackend->SetOutputLayerProperty(property); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to set output layer property"); - } - - LOGI("LEAVE"); - } - - int Inference::ConfigureBackendType( - const mv_inference_backend_type_e backendType) - { - // Check if a given backend type is valid or not. - if (backendType <= MV_INFERENCE_BACKEND_NONE || - backendType >= MV_INFERENCE_BACKEND_MAX) { - LOGE("Invalid backend type."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - std::pair backend = - mSupportedInferenceBackend[backendType]; - if (backend.second == false) { - LOGE("%s type is not supported", (backend.first).c_str()); - return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; - } - - LOGI("backend engine : %d", backendType); - - mConfig.mBackedType = backendType; - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::ConfigureTargetTypes(const int targetType) - { - // Check if given target types are valid or not. - if (MV_INFERENCE_TARGET_NONE >= targetType || - MV_INFERENCE_TARGET_MAX <= targetType) { - LOGE("Invalid target device."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - LOGI("Before converting target types : %d", targetType); - - unsigned int new_type = MV_INFERENCE_TARGET_DEVICE_NONE; - - // Convert old type to new one. - switch (targetType) { - case MV_INFERENCE_TARGET_CPU: - if (mBackendCapacity.supported_accel_devices != INFERENCE_TARGET_CPU) { - LOGE("Backend doesn't support CPU acceleration."); - return MEDIA_VISION_ERROR_NOT_SUPPORTED; - } - new_type = MV_INFERENCE_TARGET_DEVICE_CPU; - break; - case MV_INFERENCE_TARGET_GPU: - if (mBackendCapacity.supported_accel_devices != INFERENCE_TARGET_GPU) { - LOGE("Backend doesn't support GPU acceleration."); - return MEDIA_VISION_ERROR_NOT_SUPPORTED; - } - new_type = MV_INFERENCE_TARGET_DEVICE_GPU; - break; - case MV_INFERENCE_TARGET_CUSTOM: - if (mBackendCapacity.supported_accel_devices != INFERENCE_TARGET_CUSTOM) { - LOGE("Backend doesn't support custom device acceleration."); - return MEDIA_VISION_ERROR_NOT_SUPPORTED; - } - new_type = MV_INFERENCE_TARGET_DEVICE_CUSTOM; - break; - } - - LOGI("After converting target types : %d", new_type); - - mConfig.mTargetTypes = new_type; - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::ConfigureTargetDevices(const int targetDevices) - { - // Check if given target types are valid or not. - if (MV_INFERENCE_TARGET_DEVICE_NONE >= targetDevices || - MV_INFERENCE_TARGET_DEVICE_MAX <= targetDevices) { - LOGE("Invalid target device."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - LOGI("target devices : %d", targetDevices); - - if (!(mBackendCapacity.supported_accel_devices & targetDevices)) { - LOGE("Backend doesn't support a given device acceleration."); - return MEDIA_VISION_ERROR_NOT_SUPPORTED; - } - - mConfig.mTargetTypes = targetDevices; - - return MEDIA_VISION_ERROR_NONE; - } - - void Inference::ConfigureOutput(const int maxOutputNumbers) - { - mConfig.mMaxOutputNumbers = std::max( - std::min(maxOutputNumbers, MV_INFERENCE_OUTPUT_NUMBERS_MAX), - MV_INFERENCE_OUTPUT_NUMBERS_MIN); - } - - void Inference::ConfigureThreshold(const double threshold) - { - mConfig.mConfidenceThresHold = std::max( - std::min(threshold, MV_INFERENCE_CONFIDENCE_THRESHOLD_MAX), - MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN); - } - - int Inference::ParseMetadata(const std::string filePath) - { - LOGI("ENTER"); - int ret = mMetadata.Init(filePath); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to init metadata[%d]", ret); - return ret; - } - - ret = mMetadata.Parse(); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to parse metadata[%d]", ret); - return ret; - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - void Inference::CleanupTensorBuffers(void) - { - LOGI("ENTER"); - - if (!mInputTensorBuffers.empty()) { - mInputTensorBuffers.clear(); - } - - if (!mOutputTensorBuffers.empty()) { - mOutputTensorBuffers.clear(); - } - - LOGI("LEAVE"); - } - - int Inference::PrepareTenosrBuffers(void) - { - // If there are input and output tensor buffers allocated before then release the buffers. - // They will be allocated again according to a new model file to be loaded. - CleanupTensorBuffers(); - - // IF model file is loaded again then the model type could be different so - // clean up input and output layer properties so that they can be updated again - // after reloading the model file. - if (!mInputLayerProperty.layers.empty()) { - mInputLayerProperty.layers.clear(); - std::map().swap( - mInputLayerProperty.layers); - } - if (!mOutputLayerProperty.layers.empty()) { - mOutputLayerProperty.layers.clear(); - std::map().swap( - mOutputLayerProperty.layers); - } - - // Get input tensor buffers from a backend engine if the backend engine allocated. - auto& inputTensorBuffers = mInputTensorBuffers.getAllTensorBuffer(); - int ret = mBackend->GetInputTensorBuffers(inputTensorBuffers); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to get input tensor buffers from backend engine."); - return ConvertEngineErrorToVisionError(ret); - } - - ret = mBackend->GetInputLayerProperty(mInputLayerProperty); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to get input layer property from backend engine."); - return ConvertEngineErrorToVisionError(ret); - } - - // If the backend engine isn't able to allocate input tensor buffers internally, - // then allocate the buffers at here. - if (mInputTensorBuffers.empty()) { - for(auto& layer : mInputLayerProperty.layers) { - const inference_engine_tensor_info& tensor_info = layer.second; - inference_engine_tensor_buffer tensor_buffer; - if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_FLOAT32) { - tensor_buffer.buffer = new float[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_UINT8) { - tensor_buffer.buffer = new unsigned char[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_UINT16) { - tensor_buffer.buffer = new unsigned short[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else { - LOGE("Invalid input tensor data type."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - if (tensor_buffer.buffer == NULL) { - LOGE("Fail to allocate input tensor buffer."); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - LOGI("Allocated input tensor buffer(size = %zu, data type = %d)", - tensor_info.size, tensor_info.data_type); - tensor_buffer.owner_is_backend = 0; - tensor_buffer.data_type = tensor_info.data_type; - mInputTensorBuffers.setTensorBuffer(layer.first, tensor_buffer); - } - } - - LOGI("Input tensor buffer count is %zu", mInputTensorBuffers.size()); - - // Get output tensor buffers from a backend engine if the backend engine allocated. - auto& outputTensorBuffers = mOutputTensorBuffers.getAllTensorBuffer(); - ret = mBackend->GetOutputTensorBuffers(outputTensorBuffers); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to get output tensor buffers from backend engine."); - return ConvertEngineErrorToVisionError(ret); - } - - ret = mBackend->GetOutputLayerProperty(mOutputLayerProperty); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to get output layer property from backend engine."); - return ConvertEngineErrorToVisionError(ret); - } - - // If the backend engine isn't able to allocate output tensor buffers internally, - // then allocate the buffers at here. - if (mOutputTensorBuffers.empty()) { - for (auto& layer : mOutputLayerProperty.layers) { - const inference_engine_tensor_info& tensor_info = layer.second; - inference_engine_tensor_buffer tensor_buffer; - if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_FLOAT32) { - tensor_buffer.buffer = new float[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_INT64) { - tensor_buffer.buffer = new long long[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_UINT32) { - tensor_buffer.buffer = new unsigned int[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_UINT8) { - tensor_buffer.buffer = new char[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else if (tensor_info.data_type == - INFERENCE_TENSOR_DATA_TYPE_UINT16) { - tensor_buffer.buffer = new unsigned short[tensor_info.size]; - tensor_buffer.size = tensor_info.size; - } else { - LOGE("Invalid output tensor data type."); - CleanupTensorBuffers(); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - if (tensor_buffer.buffer == NULL) { - LOGE("Fail to allocate output tensor buffer."); - CleanupTensorBuffers(); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - LOGI("Allocated output tensor buffer(size = %zu, data type = %d)", - tensor_info.size, tensor_info.data_type); - - tensor_buffer.owner_is_backend = 0; - tensor_buffer.data_type = tensor_info.data_type; - mOutputTensorBuffers.setTensorBuffer(layer.first, tensor_buffer); - } - } - - LOGI("Output tensor buffer count is %zu", mOutputTensorBuffers.size()); - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::FillOutputResult(tensor_t &outputData) - { - for (auto& layer : mOutputLayerProperty.layers) { - const inference_engine_tensor_info& tensor_info = layer.second; - - std::vector tmpDimInfo; - for (auto& dim : tensor_info.shape) { - tmpDimInfo.push_back(dim); - } - - outputData.dimInfo.push_back(tmpDimInfo); - - inference_engine_tensor_buffer* tensorBuffers = - mOutputTensorBuffers.getTensorBuffer(layer.first); - if (tensorBuffers == NULL) { - LOGE("Fail to getTensorBuffer with name %s", layer.first.c_str()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - // Normalize output tensor data converting it to float type in case of quantized model. - if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT8) { - float *new_buf = new float[tensor_info.size]; - if (new_buf == NULL) { - LOGE("Fail to allocate a new output tensor buffer."); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - auto *ori_buf = static_cast( - tensorBuffers->buffer); - - for (int j = 0; j < tensor_info.size; j++) { - new_buf[j] = static_cast(ori_buf[j]) / 255.0f; - } - - // replace original buffer with new one, and release origin one. - tensorBuffers->buffer = new_buf; - - if (!tensorBuffers->owner_is_backend) - delete[] ori_buf; - } - - if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT16) { - float *new_buf = new float[tensor_info.size]; - if (new_buf == NULL) { - LOGE("Fail to allocate a new output tensor buffer."); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - auto *ori_buf = - static_cast(tensorBuffers->buffer); - - for (int j = 0; j < tensor_info.size; j++) { - new_buf[j] = static_cast(ori_buf[j]); - } - - // replace original buffer with new one, and release origin one. - tensorBuffers->buffer = new_buf; - - if (!tensorBuffers->owner_is_backend) - delete[] ori_buf; - } - - outputData.data.push_back(static_cast(tensorBuffers->buffer)); - } - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::Bind(void) - { - LOGI("ENTER"); - - if (mConfig.mBackedType <= MV_INFERENCE_BACKEND_NONE || - mConfig.mBackedType >= MV_INFERENCE_BACKEND_MAX) { - LOGE("NOT SUPPORTED BACKEND %d", mConfig.mBackedType); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - std::string backendName = mSupportedInferenceBackend[mConfig.mBackedType].first; - LOGI("backend string name: %s", backendName.c_str()); - - inference_engine_config config = { - .backend_name = backendName, - .backend_type = mConfig.mBackedType, - // As a default, Target device is CPU. If user defined desired device type in json file - // then the device type will be set by Load callback. - .target_devices = mConfig.mTargetTypes, - }; - - // Create a backend class object. - try { - mBackend = new InferenceEngineCommon(); - } catch (const std::bad_alloc &ex) { - LOGE("Fail to create backend : %s", ex.what()); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - int ret = MEDIA_VISION_ERROR_NONE; - - // Load configuration file if a given backend type is mlapi. - if (config.backend_type == MV_INFERENCE_BACKEND_MLAPI) { - ret = mBackend->LoadConfigFile(); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - } - - // Bind a backend library. - ret = mBackend->BindBackend(&config); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - LOGE("Fail to bind backend library.(%d)", mConfig.mBackedType); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - // Get capacity information from a backend. - ret = mBackend->GetBackendCapacity(&mBackendCapacity); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get backend capacity."); - return ret; - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::Prepare(void) - { - LOGI("ENTER"); - - mCh = mConfig.mTensorInfo.ch; - mDim = mConfig.mTensorInfo.dim; - mInputSize = - cv::Size(mConfig.mTensorInfo.width, mConfig.mTensorInfo.height); - LOGI("InputSize is %d x %d\n", mInputSize.width, mInputSize.height); - - mDeviation = mConfig.mStdValue; - mMean = mConfig.mMeanValue; - LOGI("mean %.4f, deviation %.4f", mMean, mDeviation); - - mOutputNumbers = mConfig.mMaxOutputNumbers; - LOGI("outputNumber %d", mOutputNumbers); - - mThreshold = mConfig.mConfidenceThresHold; - LOGI("threshold %.4f", mThreshold); - - // Check if backend supports a given target device/devices or not. - if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_CPU) { - if (!(mBackendCapacity.supported_accel_devices & - INFERENCE_TARGET_CPU)) { - LOGE("Backend doesn't support CPU device as an accelerator."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - } - - if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_GPU) { - if (!(mBackendCapacity.supported_accel_devices & - INFERENCE_TARGET_GPU)) { - LOGE("Backend doesn't support CPU device as an accelerator."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - } - - if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_CUSTOM) { - if (!(mBackendCapacity.supported_accel_devices & - INFERENCE_TARGET_CUSTOM)) { - LOGE("Backend doesn't support CPU device as an accelerator."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - } - - mBackend->SetTargetDevices(ConvertTargetTypes(mConfig.mTargetTypes)); - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::Load(void) - { - LOGI("ENTER"); - - std::string label_file = mConfig.mUserFilePath; - size_t userFileLength = label_file.length(); - if (userFileLength > 0 && access(label_file.c_str(), F_OK)) { - LOGE("Label file path in [%s] ", label_file.c_str()); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - int ret = (userFileLength > 0) ? SetUserFile(label_file) : - MEDIA_VISION_ERROR_NONE; - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to load label file."); - return ret; - } - - // Check if model file is valid or not. - std::string ext_str = mConfig.mWeightFilePath.substr( - mConfig.mWeightFilePath.find_last_of(".") + 1); - std::map::iterator key = mModelFormats.find(ext_str); - if (key == mModelFormats.end()) { - LOGE("Invalid model file format.(ext = %s)", ext_str.c_str()); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - LOGI("%s model file has been detected.", ext_str.c_str()); - - std::vector models; - - inference_model_format_e model_format = - static_cast(key->second); - - // Push model file information to models vector properly according to detected model format. - switch (model_format) { - case INFERENCE_MODEL_CAFFE: - case INFERENCE_MODEL_TF: - case INFERENCE_MODEL_DARKNET: - case INFERENCE_MODEL_DLDT: - case INFERENCE_MODEL_ONNX: - case INFERENCE_MODEL_VIVANTE: - models.push_back(mConfig.mWeightFilePath); - models.push_back(mConfig.mConfigFilePath); - break; - case INFERENCE_MODEL_TFLITE: - case INFERENCE_MODEL_TORCH: - models.push_back(mConfig.mWeightFilePath); - break; - default: - break; - } - - // Request model loading to backend engine. - ret = mBackend->Load(models, model_format); - if (ret != INFERENCE_ENGINE_ERROR_NONE) { - delete mBackend; - LOGE("Fail to load model"); - mCanRun = false; - std::vector().swap(models); - return ConvertEngineErrorToVisionError(ret); - } - - std::vector().swap(models); - - // Prepare input and output tensor buffers. - PrepareTenosrBuffers(); - - mCanRun = true; - - LOGI("LEAVE"); - - return ConvertEngineErrorToVisionError(ret); - } - - int Inference::Run(std::vector &mvSources, - std::vector &rects) - { - int ret = INFERENCE_ENGINE_ERROR_NONE; - - if (!mCanRun) { - LOGE("Invalid to run inference"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - /* convert mv_source to cv::Mat */ - cv::Mat cvSource; - cv::Rect cvRoi; - unsigned int width = 0, height = 0; - unsigned int bufferSize = 0; - unsigned char *buffer = NULL; - - if (mvSources.empty()) { - LOGE("mvSources should contain only one cv source."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - // We are able to request Only one input data for the inference as of now. - if (mvSources.size() > 1) { - LOGE("It allows only one mv source for the inference."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - // TODO. Consider multiple sources. - mv_source_h mvSource = mvSources.front(); - mv_rectangle_s *roi = rects.empty() ? NULL : &(rects.front()); - - mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID; - - if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE || - mv_source_get_height(mvSource, &height) != - MEDIA_VISION_ERROR_NONE || - mv_source_get_colorspace(mvSource, &colorspace) != - MEDIA_VISION_ERROR_NONE || - mv_source_get_buffer(mvSource, &buffer, &bufferSize)) - return MEDIA_VISION_ERROR_INTERNAL; - - // TODO. Let's support various color spaces. - - if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) { - LOGE("Not Supported format!\n"); - return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; - } - - if (roi == NULL) { - cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), - buffer) - .clone(); - } else { - cvRoi.x = roi->point.x; - cvRoi.y = roi->point.y; - cvRoi.width = (roi->point.x + roi->width) >= width ? - width - roi->point.x : - roi->width; - cvRoi.height = (roi->point.y + roi->height) >= height ? - height - roi->point.y : - roi->height; - cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), - buffer)(cvRoi) - .clone(); - } - - LOGI("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height); - - if (mCh != 1 && mCh != 3) { - LOGE("Channel not supported."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - const InputMetadata& inputMeta = mMetadata.GetInputMeta(); - if (inputMeta.parsed) { - for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) { - inference_engine_tensor_buffer& tensor_buffer = buffer.second; - const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first); - const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first); - - int data_type = ConvertToCv(tensor_buffer.data_type); - - // mSourceSize is original input image's size - mSourceSize = cvSource.size(); - // TODO: try-catch{} error handling - ret = mPreProc.Run(cvSource, colorspace, data_type, layerInfo, opt, tensor_buffer.buffer); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to run pre-process."); - return ret; - } - } - } else { - for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) { - inference_engine_tensor_buffer& tensor_buffer = buffer.second; - - int data_type = ConvertToCv(tensor_buffer.data_type); - - // Convert color space of input tensor data and then normalize it. - - ret = Preprocess(cvSource, - cv::Mat(mInputSize.height, mInputSize.width, - data_type, tensor_buffer.buffer), - data_type); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to preprocess input tensor data."); - return ret; - } - } - } - - ret = mBackend->Run(mInputTensorBuffers.getAllTensorBuffer(), - mOutputTensorBuffers.getAllTensorBuffer()); - - return ConvertEngineErrorToVisionError(ret); - } - - std::pair - Inference::GetSupportedInferenceBackend(int backend) - { - return mSupportedInferenceBackend[backend]; - } - - int Inference::GetClassficationResults( - ImageClassificationResults *classificationResults) - { - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { - std::vector> topScore; - float value = 0.0f; - auto& info = outputMeta.GetScore(); - - std::vector indexes = info.GetDimInfo().GetValidIndexAll(); - if (indexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - int classes = mOutputLayerProperty.layers[info.GetName()].shape[indexes[0]]; - - if (!mOutputTensorBuffers.exist(info.GetName())) { - LOGE("output buffe is NULL"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - mPostProc.ScoreClear(info.GetTopNumber()); - for (int cId = 0; cId < classes; ++cId) { - try { - value = mOutputTensorBuffers.getValue(info.GetName(), cId); - } catch (const std::exception& e) { - LOGE(" Fail to get getValue with %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - if (info.GetDeQuant()) { - value = PostProcess::dequant(value, - info.GetDeQuant()->GetScale(), - info.GetDeQuant()->GetZeroPoint()); - } - if (info.GetType() == 1) { - value = PostProcess::sigmoid(value); - } - - if (value < info.GetThresHold()) - continue; - - LOGI("id[%d]: %.3f", cId, value); - mPostProc.ScorePush(value, cId); - } - mPostProc.ScorePop(topScore); - - ImageClassificationResults results; - results.number_of_classes = 0; - for (auto& value : topScore) { - LOGI("score: %.3f, threshold: %.3f", value.first, info.GetThresHold()); - LOGI("idx:%d", value.second); - LOGI("classProb: %.3f", value.first); - - results.indices.push_back(value.second); - results.confidences.push_back(value.first); - results.names.push_back(mUserListName[value.second]); - results.number_of_classes++; - } - - *classificationResults = results; - LOGE("Inference: GetClassificationResults: %d\n", - results.number_of_classes); - - } else { - tensor_t outputData; - - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } - - // Will contain top N results in ascending order. - std::vector > top_results; - std::priority_queue, - std::vector >, - std::greater > > - top_result_pq; - float value = 0.0f; - - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - - int count = inferDimInfo[0][1]; - LOGI("count: %d", count); - float *prediction = reinterpret_cast(inferResults[0]); - for (int i = 0; i < count; ++i) { - value = prediction[i]; - - // Only add it if it beats the threshold and has a chance at being in - // the top N. - top_result_pq.push(std::pair(value, i)); - - // If at capacity, kick the smallest value out. - if (top_result_pq.size() > mOutputNumbers) { - top_result_pq.pop(); - } - } - - // Copy to output vector and reverse into descending order. - while (!top_result_pq.empty()) { - top_results.push_back(top_result_pq.top()); - top_result_pq.pop(); - } - std::reverse(top_results.begin(), top_results.end()); - - int classIdx = -1; - ImageClassificationResults results; - results.number_of_classes = 0; - for (int idx = 0; idx < top_results.size(); ++idx) { - if (top_results[idx].first < mThreshold) - continue; - LOGI("idx:%d", idx); - LOGI("classIdx: %d", top_results[idx].second); - LOGI("classProb: %f", top_results[idx].first); - - classIdx = top_results[idx].second; - results.indices.push_back(classIdx); - results.confidences.push_back(top_results[idx].first); - results.names.push_back(mUserListName[classIdx]); - results.number_of_classes++; - } - - *classificationResults = results; - LOGE("Inference: GetClassificationResults: %d\n", - results.number_of_classes); - } - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::GetObjectDetectionResults( - ObjectDetectionResults *detectionResults) - { - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { - // decoding type - auto& boxInfo = outputMeta.GetBox(); - auto& scoreInfo = outputMeta.GetScore(); - if (!mOutputTensorBuffers.exist(boxInfo.GetName()) || - !mOutputTensorBuffers.exist(scoreInfo.GetName()) ){ - LOGE("output buffers named of %s or %s are NULL", - boxInfo.GetName().c_str(), scoreInfo.GetName().c_str()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - int boxOffset = 0; - int numberOfObjects = 0; - if (boxInfo.GetDecodingType() == 0) { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; - } else { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; - - std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); - if (scoreIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - numberOfObjects = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]]; - } - - ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset, - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()), - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()), - numberOfObjects); - - objDecoder.init(); - objDecoder.decode(); - ObjectDetectionResults results; - results.number_of_objects = 0; - - for (auto& box : objDecoder.getObjectAll()) { - results.indices.push_back(box.index); - results.names.push_back(mUserListName[box.index]); - results.confidences.push_back(box.score); - results.locations.push_back(cv::Rect( - static_cast((box.location.x - box.location.width * 0.5f) * static_cast(mSourceSize.width)), - static_cast((box.location.y - box.location.height * 0.5f) * static_cast(mSourceSize.height)), - static_cast(box.location.width * static_cast(mSourceSize.width)), - static_cast(box.location.height * static_cast(mSourceSize.height)))); - results.number_of_objects++; - } - *detectionResults = results; - LOGI("Inference: GetObjectDetectionResults: %d\n", - results.number_of_objects); - } else { - tensor_t outputData; - - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } - - // In case of object detection, - // a model may apply post-process but others may not. - // Thus, those cases should be hanlded separately. - std::vector > inferDimInfo(outputData.dimInfo); - LOGI("inferDimInfo size: %zu", outputData.dimInfo.size()); - - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - LOGI("inferResults size: %zu", inferResults.size()); - - float *boxes = nullptr; - float *classes = nullptr; - float *scores = nullptr; - int number_of_detections = 0; - - cv::Mat cvScores, cvClasses, cvBoxes; - if (outputData.dimInfo.size() == 1) { - // there is no way to know how many objects are detect unless the number of objects aren't - // provided. In the case, each backend should provide the number of results manually. - // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are - // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7 - // indicates the image id. But it is useless if a batch mode isn't supported. - // So, use the 1st of 7. - - number_of_detections = static_cast( - *reinterpret_cast(outputData.data[0])); - cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], - CV_32F, outputData.data[0]); - - // boxes - cv::Mat cvLeft = cvOutputData.col(3).clone(); - cv::Mat cvTop = cvOutputData.col(4).clone(); - cv::Mat cvRight = cvOutputData.col(5).clone(); - cv::Mat cvBottom = cvOutputData.col(6).clone(); - - cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight }; - cv::hconcat(cvBoxElems, 4, cvBoxes); - - // classes - cvClasses = cvOutputData.col(1).clone(); - - // scores - cvScores = cvOutputData.col(2).clone(); - - boxes = cvBoxes.ptr(0); - classes = cvClasses.ptr(0); - scores = cvScores.ptr(0); - - } else { - boxes = reinterpret_cast(inferResults[0]); - classes = reinterpret_cast(inferResults[1]); - scores = reinterpret_cast(inferResults[2]); - number_of_detections = - (int) (*reinterpret_cast(inferResults[3])); - } - - LOGI("number_of_detections = %d", number_of_detections); - - int left, top, right, bottom; - cv::Rect loc; - - ObjectDetectionResults results; - results.number_of_objects = 0; - for (int idx = 0; idx < number_of_detections; ++idx) { - if (scores[idx] < mThreshold) - continue; - - left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); - top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); - right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); - bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); - - loc.x = left; - loc.y = top; - loc.width = right - left + 1; - loc.height = bottom - top + 1; - - results.indices.push_back(static_cast(classes[idx])); - results.confidences.push_back(scores[idx]); - results.names.push_back( - mUserListName[static_cast(classes[idx])]); - results.locations.push_back(loc); - results.number_of_objects++; - - LOGI("objectClass: %d", static_cast(classes[idx])); - LOGI("confidence:%f", scores[idx]); - LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, - bottom); - } - - *detectionResults = results; - LOGI("Inference: GetObjectDetectionResults: %d\n", - results.number_of_objects); - } - - return MEDIA_VISION_ERROR_NONE; - } - - int - Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults) - { - OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); - if (outputMeta.IsParsed()) { - // decoding type - auto& boxInfo = outputMeta.GetBox(); - auto& scoreInfo = outputMeta.GetScore(); - if (!mOutputTensorBuffers.exist(boxInfo.GetName()) || - !mOutputTensorBuffers.exist(scoreInfo.GetName())){ - LOGE("output buffers named of %s or %s are NULL", - boxInfo.GetName().c_str(), scoreInfo.GetName().c_str()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - int boxOffset = 0; - int numberOfFaces = 0; - if (boxInfo.GetDecodingType() == 0) { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; - } else { - std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); - if (boxIndexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; - - std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); - if (scoreIndexes.size() != 1) { - LOGE("Invaid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - numberOfFaces = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]]; - } - - ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset, - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()), - static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()), - numberOfFaces); - - objDecoder.init(); - objDecoder.decode(); - FaceDetectionResults results; - results.number_of_faces = 0; - - for (auto& face : objDecoder.getObjectAll()) { - results.confidences.push_back(face.score); - results.locations.push_back(cv::Rect( - static_cast((face.location.x - face.location.width * 0.5f) * static_cast(mSourceSize.width)), - static_cast((face.location.y - face.location.height * 0.5f) * static_cast(mSourceSize.height)), - static_cast(face.location.width * static_cast(mSourceSize.width)), - static_cast(face.location.height * static_cast(mSourceSize.height)))); - results.number_of_faces++; - } - *detectionResults = results; - LOGE("Inference: GetFaceDetectionResults: %d\n", - results.number_of_faces); - } else { - tensor_t outputData; - - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } - - // In case of object detection, - // a model may apply post-process but others may not. - // Thus, those cases should be handled separately. - std::vector > inferDimInfo(outputData.dimInfo); - LOGI("inferDimInfo size: %zu", outputData.dimInfo.size()); - - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - LOGI("inferResults size: %zu", inferResults.size()); - - float *boxes = nullptr; - float *classes = nullptr; - float *scores = nullptr; - int number_of_detections = 0; - - cv::Mat cvScores, cvClasses, cvBoxes; - if (outputData.dimInfo.size() == 1) { - // there is no way to know how many objects are detect unless the number of objects aren't - // provided. In the case, each backend should provide the number of results manually. - // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are - // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7 - // indicates the image id. But it is useless if a batch mode isn't supported. - // So, use the 1st of 7. - - number_of_detections = static_cast( - *reinterpret_cast(outputData.data[0])); - cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], - CV_32F, outputData.data[0]); - - // boxes - cv::Mat cvLeft = cvOutputData.col(3).clone(); - cv::Mat cvTop = cvOutputData.col(4).clone(); - cv::Mat cvRight = cvOutputData.col(5).clone(); - cv::Mat cvBottom = cvOutputData.col(6).clone(); - - cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight }; - cv::hconcat(cvBoxElems, 4, cvBoxes); - - // classes - cvClasses = cvOutputData.col(1).clone(); - - // scores - cvScores = cvOutputData.col(2).clone(); - - boxes = cvBoxes.ptr(0); - classes = cvClasses.ptr(0); - scores = cvScores.ptr(0); - - } else { - boxes = reinterpret_cast(inferResults[0]); - classes = reinterpret_cast(inferResults[1]); - scores = reinterpret_cast(inferResults[2]); - number_of_detections = static_cast( - *reinterpret_cast(inferResults[3])); - } - - int left, top, right, bottom; - cv::Rect loc; - - FaceDetectionResults results; - results.number_of_faces = 0; - for (int idx = 0; idx < number_of_detections; ++idx) { - if (scores[idx] < mThreshold) - continue; - - left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); - top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); - right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); - bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); - - loc.x = left; - loc.y = top; - loc.width = right - left + 1; - loc.height = bottom - top + 1; - - results.confidences.push_back(scores[idx]); - results.locations.push_back(loc); - results.number_of_faces++; - - LOGI("confidence:%f", scores[idx]); - LOGI("class: %f", classes[idx]); - LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1], - boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]); - LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, - bottom); - } - - *detectionResults = results; - LOGE("Inference: GetFaceDetectionResults: %d\n", - results.number_of_faces); - } - - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::GetFacialLandMarkDetectionResults( - FacialLandMarkDetectionResults *detectionResults) - { - tensor_t outputData; - - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } - - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - - long number_of_detections = inferDimInfo[0][1]; - float *loc = reinterpret_cast(inferResults[0]); - - FacialLandMarkDetectionResults results; - results.number_of_landmarks = 0; - - cv::Point point(0, 0); - results.number_of_landmarks = 0; - LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); - for (int idx = 0; idx < number_of_detections; idx += 2) { - point.x = static_cast(loc[idx] * mSourceSize.width); - point.y = static_cast(loc[idx + 1] * mSourceSize.height); - - results.locations.push_back(point); - results.number_of_landmarks++; - - LOGI("x:%d, y:%d", point.x, point.y); - } - - *detectionResults = results; - LOGE("Inference: FacialLandmarkDetectionResults: %d\n", - results.number_of_landmarks); - return MEDIA_VISION_ERROR_NONE; - } - - int Inference::GetPoseLandmarkDetectionResults( - mv_inference_pose_result_h *detectionResults, int width, int height) - { - tensor_t outputData; - - // Get inference result and contain it to outputData. - int ret = FillOutputResult(outputData); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get output result."); - return ret; - } - - std::vector > inferDimInfo(outputData.dimInfo); - std::vector inferResults(outputData.data.begin(), - outputData.data.end()); - - long number_of_poses = 1; - long number_of_landmarks = inferDimInfo[0][3]; - float *tmp = static_cast(inferResults[0]); - cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]); - - cv::Point loc; - cv::Point2f loc2f; - double score; - cv::Mat blurredHeatMap; - - cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), - CV_32FC(inferDimInfo[0][3]), (void *) tmp); - - cv::Mat multiChannels[inferDimInfo[0][3]]; - split(reShapeTest, multiChannels); - - float ratioX = static_cast(inferDimInfo[0][2]); - float ratioY = static_cast(inferDimInfo[0][1]); - - if (mPoseResult == NULL) { - if(!mUserListName.empty()) { - number_of_landmarks = mUserListName.size(); - } - mPoseResult = new mv_inference_pose_s; - if (mPoseResult == NULL) { - LOGE("Fail to create result handle"); - return MEDIA_VISION_ERROR_INTERNAL; - } - - mPoseResult->number_of_poses= number_of_poses; - mPoseResult->number_of_landmarks_per_pose = number_of_landmarks; - mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses]; - for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { - mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks]; - for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; ++landmarkIndex) { - mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = false; - mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = -1; - mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = -1; - mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1; - mPoseResult->landmarks[poseIndex][landmarkIndex].score = -1.0f; - } - } - } - - int part = 0; - for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { - for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { - part = landmarkIndex; - if (!mUserListName.empty()) { - part = std::stoi(mUserListName[landmarkIndex]) - 1; - if (part < 0) { - continue; - } - } - - cv::Mat heatMap = multiChannels[part]; - - cv::GaussianBlur(heatMap, blurredHeatMap, cv::Size(), 5.0, 5.0); - cv::minMaxLoc(heatMap, NULL, &score, NULL, &loc); - - loc2f.x = (static_cast(loc.x) / ratioX); - loc2f.y = (static_cast(loc.y) / ratioY); - LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]", - landmarkIndex, part, loc2f.x, loc2f.y, score); - - mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true; - mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = static_cast(static_cast(width) * loc2f.x); - mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = static_cast(static_cast(height) * loc2f.y); - mPoseResult->landmarks[poseIndex][landmarkIndex].score = score; - mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1; - } - } - - *detectionResults = static_cast(mPoseResult); - - return MEDIA_VISION_ERROR_NONE; - } - -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/InferenceIni.cpp b/mv_inference/inference/src/InferenceIni.cpp deleted file mode 100644 index 7dc5fa51..00000000 --- a/mv_inference/inference/src/InferenceIni.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include "mv_private.h" -#include "InferenceIni.h" - -namespace mediavision -{ -namespace inference -{ - const std::string INFERENCE_INI_FILENAME = - "/inference/inference_engine.ini"; - - static inline std::string &rtrim(std::string &s, - const char *t = " \t\n\r\f\v") - { - s.erase(s.find_last_not_of(t) + 1); - return s; - } - - static inline std::string <rim(std::string &s, - const char *t = " \t\n\r\f\v") - { - s.erase(s.find_first_not_of(t) + 1); - return s; - } - - static inline std::string &trim(std::string &s, - const char *t = " \t\n\r\f\v") - { - return ltrim(rtrim(s, t), t); - } - - InferenceInI::InferenceInI() - : mIniDefaultPath(SYSCONFDIR) - , mDefaultBackend("OPENCV") - , mDelimeter(",") - { - mIniDefaultPath += INFERENCE_INI_FILENAME; - } - - InferenceInI::~InferenceInI() - {} - - int InferenceInI::LoadInI() - { - LOGI("ENTER"); - dictionary *dict = iniparser_load(mIniDefaultPath.c_str()); - if (dict == NULL) { - LOGE("Fail to load ini"); - return -1; - } - - std::string list = std::string(iniparser_getstring( - dict, "inference backend:supported backend types", - (char *) mDefaultBackend.c_str())); - - size_t pos = 0; - while ((pos = list.find(mDelimeter)) != std::string::npos) { - std::string tmp = list.substr(0, pos); - mSupportedInferenceBackend.push_back(atoi(tmp.c_str())); - - list.erase(0, pos + mDelimeter.length()); - } - mSupportedInferenceBackend.push_back(atoi(list.c_str())); - - if (dict) { - iniparser_freedict(dict); - dict = NULL; - } - - LOGI("LEAVE"); - return 0; - } - - void InferenceInI::UnLoadInI() - { - ; - } - - std::vector InferenceInI::GetSupportedInferenceEngines() - { - return mSupportedInferenceBackend; - } - -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/InputMetadata.cpp b/mv_inference/inference/src/InputMetadata.cpp deleted file mode 100644 index 28edbe34..00000000 --- a/mv_inference/inference/src/InputMetadata.cpp +++ /dev/null @@ -1,234 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" - -#include -#include -#include -#include -#include -#include "InputMetadata.h" -#include - -namespace mediavision -{ -namespace inference -{ - mv_colorspace_e InputMetadata::ConvertTypeToMD(const std::string& type) - { - mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID; - if (type.empty()) { - LOGE("Invalid type[null]"); - return colorspace; - } - - if (type.compare("RGB888") == 0) { - colorspace = MEDIA_VISION_COLORSPACE_RGB888; - } else if (type.compare("Y800") == 0) { - colorspace = MEDIA_VISION_COLORSPACE_Y800; - } else { - LOGE("Not supported channel type"); - } - - return colorspace; - } - - int InputMetadata::GetTensorInfo(JsonObject *root) - { - LOGI("ENTER"); - - if (json_object_has_member(root, "tensor_info") == false) { - LOGE("No tensor_info inputmetadata"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - // tensor_info - JsonArray * rootArray = json_object_get_array_member(root, "tensor_info"); - unsigned int elements = json_array_get_length(rootArray); - - std::map().swap(layer); - // TODO: handling error - // FIXEME: LayerInfo.set()?? - for (unsigned int elem = 0; elem < elements; ++elem) { - LayerInfo info; - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - info.name = - static_cast(json_object_get_string_member(pObject,"name")); - LOGI("layer: %s", info.name.c_str()); - - info.shapeType = - static_cast(json_object_get_int_member(pObject, "shape_type")); - LOGI("shape type: %d:%s", info.shapeType, info.shapeType == 0 ? "NCHW" : "NHWC"); - - info.dataType = - static_cast(json_object_get_int_member(pObject, "data_type")); - LOGI("data type : %d:%s", info.dataType, info.dataType == 0 ? "FLOAT32" : "UINT8"); - - const char *colorSpace = static_cast(json_object_get_string_member(pObject,"color_space")); - info.colorSpace = ConvertTypeToMD(std::string(colorSpace)); - LOGI("color space : %d:%s", info.colorSpace, info.colorSpace == MEDIA_VISION_COLORSPACE_RGB888 ? "RGB888" : ""); - - // dims - JsonArray * array = json_object_get_array_member(pObject, "shape_dims"); - unsigned int elements2 = json_array_get_length(array); - LOGI("shape dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - auto dim = static_cast(json_array_get_int_element(array, elem2)); - info.dims.push_back(dim); - LOGI("%d", dim); - } - - layer.insert(std::make_pair(info.name, info)); - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int InputMetadata::GetPreProcess(JsonObject *root) - { - LOGI("ENTER"); - - if (json_object_has_member(root, "preprocess") == false) { - LOGI("No preprocess inputmetadata"); - return MEDIA_VISION_ERROR_NONE; - } - - // preprocess - JsonArray * rootArray = json_object_get_array_member(root, "preprocess"); - unsigned int elements = json_array_get_length(rootArray); - - std::map().swap(option); - // TODO: iterLayer should be the same with elements. - auto iterLayer = layer.begin(); - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem, ++iterLayer) { - Options opt; - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - // normalization - if (json_object_has_member(pObject, "normalization")) { - JsonArray * array = json_object_get_array_member(pObject, "normalization"); - JsonNode * node = json_array_get_element(array, 0); - JsonObject * object = json_node_get_object(node); - - opt.normalization.use = true; - LOGI("use normalization"); - - JsonArray * arrayMean = json_object_get_array_member(object, "mean"); - JsonArray * arrayStd = json_object_get_array_member(object, "std"); - unsigned int elemMean = json_array_get_length(arrayMean); - unsigned int elemStd = json_array_get_length(arrayStd); - if (elemMean != elemStd) { - LOGE("Invalid mean and std values"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - for (unsigned int elem = 0; elem < elemMean; ++elem) { - auto m = static_cast(json_array_get_double_element(arrayMean, elem)); - auto s = static_cast(json_array_get_double_element(arrayStd, elem)); - opt.normalization.mean.push_back(m); - opt.normalization.std.push_back(s); - LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s); - } - } - - if (json_object_has_member(pObject, "quantization")) { - JsonArray * array = json_object_get_array_member(pObject, "quantization"); - JsonNode * node = json_array_get_element(array, 0); - JsonObject * object = json_node_get_object(node); - - opt.quantization.use = true; - LOGI("use quantization"); - - JsonArray * arrayScale = json_object_get_array_member(object, "scale"); - JsonArray * arrayZero = json_object_get_array_member(object, "zeropoint"); - unsigned int elemScale = json_array_get_length(arrayScale); - unsigned int elemZero= json_array_get_length(arrayZero); - if (elemScale != elemZero) { - LOGE("Invalid scale and zero values"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - for (unsigned int elem = 0; elem < elemScale; ++elem) { - auto s = static_cast(json_array_get_double_element(arrayScale, elem)); - auto z = static_cast(json_array_get_double_element(arrayZero, elem)); - opt.quantization.scale.push_back(s); - opt.quantization.zeropoint.push_back(z); - LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z); - } - } - option.insert(std::make_pair(iterLayer->first, opt)); - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int InputMetadata::Parse(JsonObject *root) - { - LOGI("ENTER"); - - int ret = GetTensorInfo(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetTensorInfo[%d]", ret); - return ret; - } - - ret = GetPreProcess(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetPreProcess[%d]", ret); - return ret; - } - - parsed = true; - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int LayerInfo::GetWidth() const { - if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { - return dims[3]; - } else { // INFERENCE_TENSOR_SHAPE_NWHC - return dims[1]; - } - } - - int LayerInfo::GetHeight() const { - if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { - return dims[2]; - } else { // INFERENCE_TENSOR_SHAPE_NWHC - return dims[2]; - } - } - - int LayerInfo::GetChannel() const { - if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { - return dims[1]; - } else { // INFERENCE_TENSOR_SHAPE_NWHC - return dims[3]; - } - } - -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/Metadata.cpp b/mv_inference/inference/src/Metadata.cpp deleted file mode 100644 index b2ae9ffd..00000000 --- a/mv_inference/inference/src/Metadata.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" -#include "Metadata.h" - -#include - -#include -#include -#include -#include -#include - -namespace mediavision -{ -namespace inference -{ - int Metadata::Init(const std::string& filename) - { - LOGI("ENTER"); - - if (access(filename.c_str(), F_OK | R_OK)) { - LOGE("meta file is in [%s] ", filename.c_str()); - return MEDIA_VISION_ERROR_INVALID_PATH; - } - - mMetafile = filename; - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int Metadata::Parse() - { - LOGI("ENTER"); - - if (mMetafile.empty()) { - LOGE("meta file is empty"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - GError *error = NULL; - JsonNode *node = NULL; - JsonObject *object = NULL; - int ret = MEDIA_VISION_ERROR_NONE; - - JsonParser *parser = json_parser_new(); - if (parser == NULL) { - LOGE("Fail to create json parser"); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - gboolean jsonRet = json_parser_load_from_file(parser, mMetafile.c_str(), &error); - if (!jsonRet) { - LOGE("Unable to parser file %s by %s", mMetafile.c_str(), - error == NULL ? "Unknown" : error->message); - g_error_free(error); - ret = MEDIA_VISION_ERROR_INVALID_DATA; - goto _ERROR_; - } - - node = json_parser_get_root(parser); - if (JSON_NODE_TYPE(node) != JSON_NODE_OBJECT) { - LOGE("Fail to json_parser_get_root. It's an incorrect markup"); - ret = MEDIA_VISION_ERROR_INVALID_DATA; - goto _ERROR_; - } - - object = json_node_get_object(node); - if (!object) { - LOGE("Fail to json_node_get_object. object is NULL"); - ret = MEDIA_VISION_ERROR_INVALID_DATA; - goto _ERROR_; - } - - ret = mInputMeta.Parse(json_object_get_object_member(object, "inputmetadata")); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to parse input Meta[%d]",ret); - goto _ERROR_; - } - - ret = mOutputMeta.Parse(json_object_get_object_member(object, "outputmetadata")); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to parse output meta[%d]",ret); - goto _ERROR_; - } - - _ERROR_ : - g_object_unref(parser); - parser = NULL; - LOGI("LEAVE"); - - return ret; - } - - InputMetadata& Metadata::GetInputMeta() - { - return mInputMeta; - } - - OutputMetadata& Metadata::GetOutputMeta() - { - return mOutputMeta; - } -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/ObjectDecoder.cpp b/mv_inference/inference/src/ObjectDecoder.cpp deleted file mode 100755 index b4da65f6..00000000 --- a/mv_inference/inference/src/ObjectDecoder.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" -#include "ObjectDecoder.h" - -#include -#include -#include - -namespace mediavision -{ -namespace inference -{ - int ObjectDecoder::init() - { - if (mBoxInfo.GetDecodingType() == 0) { - if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) || - !mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) { - LOGE("buffer buffers named of %s or %s are NULL", - mMeta.GetLabel().GetName().c_str(), - mMeta.GetNumber().GetName().c_str()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - std::vector indexes = mMeta.GetNumber().GetDimInfo().GetValidIndexAll(); - if (indexes.size() != 1) { - LOGE("Invalid dim size. It should be 1"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - // When GetDecodingType() == 0, the mNumberOfObjects should be set again. - // otherwise, it is set already within ctor. - mNumberOfOjects = mTensorBuffer.getValue( - mMeta.GetNumber().GetName(), indexes[0]); - } else { - if (mBoxInfo.GetDecodeInfo().IsAnchorBoxEmpty()) { - LOGE("Anchor boxes are required but empty."); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - } - - return MEDIA_VISION_ERROR_NONE; - } - - float ObjectDecoder::decodeScore(int idx) - { - float score = mTensorBuffer.getValue(mScoreInfo.GetName(), idx); - if (mScoreInfo.GetType() == 1) { - score = PostProcess::sigmoid(score); - } - - return score < mScoreInfo.GetThresHold() ? 0.0f : score; - } - - Box ObjectDecoder::decodeBox(int idx, float score, int label) - { - // assume type is (cx,cy,w,h) - // left or cx - float cx = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[0]); - // top or cy - float cy = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[1]); - // right or width - float cWidth = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[2]); - // bottom or height - float cHeight = mTensorBuffer.getValue(mBoxInfo.GetName(), - idx * mBoxOffset + mBoxInfo.GetOrder()[3]); - - // when GetType() == 0 (l,t,r,b), then convert it to (cx,cy,w,h) - if (mBoxInfo.GetType() == 0) { - float tmpCx = cx; - float tmpCy = cy; - cx = (cx + cWidth) * 0.5f; // (left + right)/2 - cy = (cy + cHeight) * 0.5f; // (top + bottom)/2 - cWidth = cWidth - tmpCx ; // right - left - cHeight = cHeight - tmpCy; // bottom - top - } - - // normalize if GetCoordinate() == 1 which is pixel coordinate (absolute) - if (mBoxInfo.GetCoordinate() == 1) { - cx /= mScaleW; - cy /= mScaleH; - cWidth /= mScaleW; - cHeight /= mScaleH; - } - - Box box = { - .index = mMeta.GetLabel().GetName().empty() ? - label : - mTensorBuffer.getValue(mMeta.GetLabel().GetName(), idx), - .score = score, - .location = cv::Rect2f(cx, cy, cWidth, cHeight) - }; - - return box; - } - - Box ObjectDecoder::decodeBoxWithAnchor(int idx, int anchorIdx, float score, cv::Rect2f& anchor) - { - // location coordinate of box, the output of decodeBox(), is relative between 0 ~ 1 - Box box = decodeBox(anchorIdx, score, idx); - - if (mBoxInfo.GetDecodeInfo().IsFixedAnchorSize()) { - box.location.x += anchor.x; - box.location.y += anchor.y; - } else { - box.location.x = box.location.x / mBoxInfo.GetDecodeInfo().GetAnchorXscale() * - anchor.width + anchor.x; - box.location.y = box.location.y / mBoxInfo.GetDecodeInfo().GetAnchorYscale() * - anchor.height + anchor.y; - } - - if (mBoxInfo.GetDecodeInfo().IsExponentialBoxScale()) { - box.location.width = anchor.width * - std::exp(box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale()); - box.location.height = anchor.height * - std::exp(box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale()); - } else { - box.location.width = anchor.width * - box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale(); - box.location.height = anchor.height * - box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale(); - } - - return box; - } - - int ObjectDecoder::decode() - { - BoxesList boxList; - int ret = MEDIA_VISION_ERROR_NONE; - - for (int idx = 0; idx < mNumberOfOjects; ++idx) { - if (mBoxInfo.GetType() == 0) { - float score = decodeScore(idx); - if (score <= 0.0f) - continue; - - Box box = decodeBox(idx, score); - mResultBoxes.push_back(box); - } else { - int anchorIdx = -1; - - Boxes boxes; - for (auto& anchorBox : mBoxInfo.GetDecodeInfo().GetAnchorBoxAll()) { - anchorIdx++; - - float score = decodeScore(anchorIdx * mNumberOfOjects + idx); - - if (score <= 0.0f) - continue; - - Box box = decodeBoxWithAnchor(idx, anchorIdx, score, anchorBox); - boxes.push_back(box); - } - boxList.push_back(boxes); - } - } - - if (!boxList.empty()) { - PostProcess postProc; - ret = postProc.Nms(boxList, - mBoxInfo.GetDecodeInfo().GetNmsMode(), - mBoxInfo.GetDecodeInfo().GetNmsThreshold(), - mResultBoxes); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to non-maximum suppression[%d]", ret); - return ret; - } - } - - return ret; - } - - Boxes& ObjectDecoder::getObjectAll() - { - return mResultBoxes; - } -} -} diff --git a/mv_inference/inference/src/OutputMetadata.cpp b/mv_inference/inference/src/OutputMetadata.cpp deleted file mode 100755 index cbfe6ad6..00000000 --- a/mv_inference/inference/src/OutputMetadata.cpp +++ /dev/null @@ -1,612 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" - -#include -#include -#include -#include -#include -#include "OutputMetadata.h" - -namespace mediavision -{ -namespace inference -{ - int ScoreInfo::ParseScore(JsonObject *root) - { - LOGI("ENTER"); - - JsonArray * rootArray = json_object_get_array_member(root, "score"); - unsigned int elements = json_array_get_length(rootArray); - - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - topNumber = static_cast(json_object_get_int_member(pObject, "top_number")); - LOGI("top number: %d", topNumber); - - threshold = static_cast(json_object_get_double_member(pObject, "threshold")); - LOGI("threshold: %1.3f", threshold); - - type = static_cast(json_object_get_int_member(pObject, "score_type")); - LOGI("score type: %d", type); - - if (json_object_has_member(pObject, "dequantization")) { - array = json_object_get_array_member(pObject, "dequantization"); - JsonNode *node = json_array_get_element(array, 0); - JsonObject *object = json_node_get_object(node); - - deQuantization = std::make_shared( - json_object_get_double_member(object, "scale"), - json_object_get_double_member(object, "zeropoint")); - } - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseScore(JsonObject *root) - { - LOGI("ENTER"); - - if (json_object_has_member(root, "score") == false) { - LOGI("No score outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - score.ParseScore(root); - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int BoxInfo::ParseBox(JsonObject *root) - { - LOGI("ENTER"); - - JsonArray * rootArray = json_object_get_array_member(root, "box"); - unsigned int elements = json_array_get_length(rootArray); - - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - - type = static_cast(json_object_get_int_member(pObject, "box_type")); - LOGI("box type: %d", type); - - array = json_object_get_array_member(pObject, "box_order"); - elements2 = json_array_get_length(array); - LOGI("box order should have 4 elements and it has [%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - auto val = static_cast(json_array_get_int_element(array, elem2)); - order.push_back(val); - LOGI("%d", val); - } - - coordinate = static_cast(json_object_get_int_member(pObject, "box_coordinate")); - LOGI("box coordinate: %d", coordinate); - - decodingType = static_cast(json_object_get_int_member(pObject, "decoding_type")); - LOGI("box decodeing type: %d", decodingType); - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - void BoxInfo::DecodeInfo::AddAnchorBox(cv::Rect2f& anchor) - { - anchorBoxes.push_back(anchor); - } - - void BoxInfo::DecodeInfo::ClearAnchorBox() - { - anchorBoxes.clear(); - } - - std::vector& BoxInfo::DecodeInfo::GetAnchorBoxAll() - { - return anchorBoxes; - } - - bool BoxInfo::DecodeInfo::IsAnchorBoxEmpty() - { - return anchorBoxes.empty(); - } - - int OutputMetadata::ParseBox(JsonObject *root) - { - LOGI("ENTER"); - - if (json_object_has_member(root, "box") == false) { - LOGE("No box outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - box.ParseBox(root); - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int Label::ParseLabel(JsonObject *root) - { - LOGI("ENTER"); - - JsonArray * rootArray = json_object_get_array_member(root, "label"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - } - - LOGI("LEAVEL"); - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseLabel(JsonObject *root) - { - LOGI("ENTER"); - - if (json_object_has_member(root, "label") == false) { - LOGE("No box outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - label.ParseLabel(root); - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int Number::ParseNumber(JsonObject *root) - { - // box - JsonArray * rootArray = json_object_get_array_member(root, "number"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - name = json_object_get_string_member(pObject,"name"); - LOGI("layer: %s", name.c_str()); - - JsonArray * array = json_object_get_array_member(pObject, "index"); - unsigned int elements2 = json_array_get_length(array); - LOGI("range dim: size[%u]", elements2); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - if (static_cast(json_array_get_int_element(array, elem2)) == 1) - dimInfo.SetValidIndex(elem2); - } - } - - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseNumber(JsonObject *root) - { - LOGI("ENTER"); - - if (json_object_has_member(root, "number") == false) { - LOGE("No number outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - number.ParseNumber(root); - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - int OutputMetadata::ParseBoxDecodeInfo(JsonObject *root) - { - LOGI("ENTER"); - - if (json_object_has_member(root, "box") == false) { - LOGE("No box outputmetadata"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - // box - JsonArray * rootArray = json_object_get_array_member(root, "box"); - unsigned int elements = json_array_get_length(rootArray); - - // TODO: handling error - for (unsigned int elem = 0; elem < elements; ++elem) { - JsonNode *pNode = json_array_get_element(rootArray, elem); - JsonObject *pObject = json_node_get_object(pNode); - - if (json_object_has_member(pObject, "decoding_info") == false) { - LOGE("decoding_info is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); - if (json_object_has_member(cObject, "anchor") == false) { - LOGE("anchor is mandatory. Invalid metadata"); - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - int ret = box.GetDecodeInfo().ParseAnchorParam(cObject); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to ParseAnchorParam[%d]", ret); - return ret; - } - - ret = box.GetDecodeInfo().ParseNms(cObject); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to ParseNms[%d]", ret); - return ret; - } - } - - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - - } - - int BoxInfo::DecodeInfo::ParseAnchorParam(JsonObject *root) - { - JsonObject *object = json_object_get_object_member(root, "anchor") ; - - this->anchorParam.mode = static_cast(json_object_get_int_member(object, "mode")); - - this->anchorParam.numLayers = static_cast(json_object_get_int_member(object, "num_layers")); - this->anchorParam.minScale = static_cast(json_object_get_double_member(object, "min_scale")); - this->anchorParam.maxScale = static_cast(json_object_get_double_member(object, "max_scale")); - this->anchorParam.inputSizeHeight = static_cast(json_object_get_int_member(object, "input_size_height")); - this->anchorParam.inputSizeWidth = static_cast(json_object_get_int_member(object, "input_size_width")); - this->anchorParam.anchorOffsetX = static_cast(json_object_get_double_member(object, "anchor_offset_x")); - this->anchorParam.anchorOffsetY = static_cast(json_object_get_double_member(object, "anchor_offset_y")); - this->anchorParam.isReduceBoxedInLowestLayer = - static_cast(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer")); - this->anchorParam.interpolatedScaleAspectRatio = - static_cast(json_object_get_double_member(object, "interpolated_scale_aspect_ratio")); - this->anchorParam.isFixedAnchorSize = - static_cast(json_object_get_boolean_member(object, "fixed_anchor_size")); - this->anchorParam.isExponentialBoxScale = - static_cast(json_object_get_boolean_member(object, "exponential_box_scale")); - - this->anchorParam.xScale = static_cast(json_object_get_double_member(object, "x_scale")); - this->anchorParam.yScale = static_cast(json_object_get_double_member(object, "y_scale")); - this->anchorParam.wScale = static_cast(json_object_get_double_member(object, "w_scale")); - this->anchorParam.hScale = static_cast(json_object_get_double_member(object, "h_scale")); - - JsonArray * array = json_object_get_array_member(object, "strides"); - unsigned int elements2 = json_array_get_length(array); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - auto stride = static_cast(json_array_get_int_element(array, elem2)); - this->anchorParam.strides.push_back(stride); - LOGI("stride: %d", stride); - } - - array = json_object_get_array_member(object, "aspect_ratios"); - elements2 = json_array_get_length(array); - for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { - auto aspectRatio = static_cast(json_array_get_double_element(array, elem2)); - this->anchorParam.aspectRatios.push_back(aspectRatio); - LOGI("aspectRatio: %.4f", aspectRatio); - } - - return MEDIA_VISION_ERROR_NONE; - } - - float BoxInfo::DecodeInfo::CalculateScale(float min, float max, int index, int maxStride) - { - return min + (max - min) * 1.0 * index / (maxStride - 1.0f); - } - - bool BoxInfo::DecodeInfo::IsFixedAnchorSize() - { - return this->anchorParam.isFixedAnchorSize;; - } - - bool BoxInfo::DecodeInfo::IsExponentialBoxScale() - { - return this->anchorParam.isExponentialBoxScale; - } - - float BoxInfo::DecodeInfo::GetAnchorXscale() - { - return this->anchorParam.xScale; - } - - float BoxInfo::DecodeInfo::GetAnchorYscale() - { - return this->anchorParam.yScale; - } - - float BoxInfo::DecodeInfo::GetAnchorWscale() - { - return this->anchorParam.wScale; - } - - float BoxInfo::DecodeInfo::GetAnchorHscale() - { - return this->anchorParam.hScale; - } - - int BoxInfo::DecodeInfo::GenerateAnchor() - { - //BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo(); - - if (this->anchorParam.strides.empty() || - this->anchorParam.aspectRatios.empty()) { - LOGE("Invalid anchor parameters"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - int layerId = 0; - this->ClearAnchorBox(); - while (layerId < this->anchorParam.numLayers) { - std::vector anchorHeight; - std::vector anchorWidth; - std::vector aspectRatios; - std::vector scales; - - int lastSameStrideLayer = layerId; - std::vector::iterator iter1, iter2; - while ((lastSameStrideLayer < this->anchorParam.numLayers) && - (this->anchorParam.strides[lastSameStrideLayer] == - this->anchorParam.strides[layerId])) { - const float scale = CalculateScale(this->anchorParam.minScale, - this->anchorParam.maxScale, - lastSameStrideLayer, - this->anchorParam.strides.size()); - - if (lastSameStrideLayer == 0 && - this->anchorParam.isReduceBoxedInLowestLayer) { - aspectRatios.push_back(1.0); - aspectRatios.push_back(2.0); - aspectRatios.push_back(0.5); - scales.push_back(0.1); - scales.push_back(scale); - scales.push_back(scale); - } else { - for (iter1 = this->anchorParam.aspectRatios.begin(); - iter1 != this->anchorParam.aspectRatios.end(); - ++iter1) { - aspectRatios.push_back((*iter1)); - scales.push_back(scale); - } - if (this->anchorParam.interpolatedScaleAspectRatio > 0.0f) { - const float scaleNext = - lastSameStrideLayer == static_cast(this->anchorParam.strides.size()) -1 - ? 1.0f - : CalculateScale(this->anchorParam.minScale, - this->anchorParam.maxScale, - lastSameStrideLayer + 1, - this->anchorParam.strides.size()); - scales.push_back(std::sqrt(scale * scaleNext)); - aspectRatios.push_back(this->anchorParam.interpolatedScaleAspectRatio); - } - } - lastSameStrideLayer++; - } - - for (iter1 = aspectRatios.begin(), iter2 = scales.begin(); - (iter1 != aspectRatios.end() && iter2 != scales.end()); - ++iter1, ++iter2) { - const float ratioSqrts = std::sqrt((*iter1)); - anchorHeight.push_back((*iter2) / ratioSqrts); - anchorWidth.push_back((*iter2) * ratioSqrts); - } - - const int stride = this->anchorParam.strides[layerId]; - int featureMapHeight = std::ceil(1.0f * this->anchorParam.inputSizeHeight / stride); - int featureMapWidth = std::ceil(1.0f * this->anchorParam.inputSizeWidth / stride); - - for (int y = 0; y < featureMapHeight; ++y) { - for (int x = 0; x < featureMapWidth; ++x) { - for (int anchorId = 0; anchorId < (int)anchorHeight.size(); ++anchorId) { - cv::Rect2f anchor = { - cv::Point2f { - (x + this->anchorParam.anchorOffsetX) * 1.0f / featureMapWidth, - (y + this->anchorParam.anchorOffsetY) * 1.0f / featureMapHeight - }, - this->anchorParam.isFixedAnchorSize ? - cv::Size2f {1.0f, 1.0f} : - cv::Size2f {anchorWidth[anchorId], anchorWidth[anchorId]} - }; - this->AddAnchorBox(anchor); - } - } - } - layerId = lastSameStrideLayer; - } - - if (this->IsAnchorBoxEmpty()) { - LOGE("Anchor boxes are empty"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - return MEDIA_VISION_ERROR_NONE; - } - - - int BoxInfo::DecodeInfo::ParseNms(JsonObject *root) - { - if (json_object_has_member(root, "nms") == false) { - LOGI("nms is empty. skip it"); - return MEDIA_VISION_ERROR_NONE; - } - - JsonObject *object = json_object_get_object_member(root, "nms"); - this->nmsParam.mode = static_cast(json_object_get_int_member(object, "mode")); - this->nmsParam.threshold = static_cast(json_object_get_double_member(object,"threshold")); - - return MEDIA_VISION_ERROR_NONE; - } - - int BoxInfo::DecodeInfo::GetNmsMode() - { - return this->nmsParam.mode; - } - - float BoxInfo::DecodeInfo::GetNmsThreshold() - { - return this->nmsParam.threshold; - } - - ScoreInfo& OutputMetadata::GetScore() - { - return score; - } - - BoxInfo& OutputMetadata::GetBox() - { - return box; - } - - Label& OutputMetadata::GetLabel() - { - return label; - } - - Number& OutputMetadata::GetNumber() - { - return number; - } - - bool OutputMetadata::IsParsed() - { - return parsed; - } - - int OutputMetadata::Parse(JsonObject *root) - { - LOGI("ENTER"); - - int ret = ParseScore(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetScore[%d]", ret); - return ret; - } - - ret = ParseBox(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetBox[%d]", ret); - return ret; - } - - if (!box.GetName().empty()) { - // addtional parsing is required according to decoding type - if (box.GetDecodingType() == 0) { - - ret = ParseLabel(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetLabel[%d]", ret); - return ret; - } - - ret = ParseNumber(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetNumber[%d]", ret); - return ret; - } - - } else if (box.GetDecodingType() == 1) { - ret = ParseBoxDecodeInfo(root); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GetBoxDecodeInfo[%d]", ret); - return ret; - } - - ret = box.GetDecodeInfo().GenerateAnchor(); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to GenerateAnchor[%d]", ret); - return ret; - } - - } else { - LOGW("Unknow box decoding type. Ignore"); - } - } - - parsed = true; - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - void DimInfo::SetValidIndex(int index) - { - LOGI("ENTER"); - - dims.push_back(index); - - LOGI("LEAVE"); - } - - std::vector DimInfo::GetValidIndexAll() const - { - LOGI("ENTER"); - - LOGI("LEAVE"); - return dims; - } -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/PostProcess.cpp b/mv_inference/inference/src/PostProcess.cpp deleted file mode 100755 index 00059b45..00000000 --- a/mv_inference/inference/src/PostProcess.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" - -#include -#include -#include -#include -#include "PostProcess.h" - -namespace mediavision -{ -namespace inference -{ - float PostProcess::sigmoid(float value) - { - return 1.0/(1.0+ exp(-value)); - } - - float PostProcess::dequant(float value, float scale, float zeropoint) - { - LOGI("ENTER"); - - LOGI("LEAVE"); - return value/scale + zeropoint; - } - - int PostProcess::ScoreClear(int size) - { - LOGI("ENTER"); - - std::priority_queue, - std::vector>, - std::greater>>().swap(mScore); - mMaxScoreSize = size; - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int PostProcess::ScorePush(float value, int index) - { - LOGI("ENTER"); - - mScore.push(std::pair(value, index)); - if (mScore.size() > mMaxScoreSize) { - mScore.pop(); - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int PostProcess::ScorePop(std::vector>& top) - { - LOGI("ENTER"); - - top.clear(); - while (!mScore.empty()) { - top.push_back(mScore.top()); - LOGI("%.3f", mScore.top().first); - mScore.pop(); - } - - std::reverse(top.begin(), top.end()); - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - static bool compareScore(Box box0, Box box1) - { - return box0.score > box1.score; - } - - static float calcIntersectionOverUnion(Box box0, Box box1) - { - - float area0 = box0.location.width * box0.location.height; - float area1 = box1.location.width * box1.location.height; - - if (area0 <= 0.0f || area1 <= 0.0f) - return 0.0f; - - float sx0 = box0.location.x - box0.location.width * 0.5f; - float sy0 = box0.location.y - box0.location.height * 0.5f; - float ex0 = box0.location.x + box0.location.width * 0.5f; - float ey0 = box0.location.y + box0.location.height * 0.5f; - float sx1 = box1.location.x - box1.location.width * 0.5f; - float sy1 = box1.location.y - box1.location.height * 0.5f; - float ex1 = box1.location.x + box1.location.width * 0.5f; - float ey1 = box1.location.y + box1.location.height * 0.5f; - - float xmin0 = std::min (sx0, ex0); - float ymin0 = std::min (sy0, ey0); - float xmax0 = std::max (sx0, ex0); - float ymax0 = std::max (sy0, ey0); - float xmin1 = std::min (sx1, ex1); - float ymin1 = std::min (sy1, ey1); - float xmax1 = std::max (sx1, ex1); - float ymax1 = std::max (sy1, ey1); - - float intersectXmin = std::max(xmin0, xmin1); - float intersectYmin = std::max(ymin0, ymin1); - float intersectXmax = std::min(xmax0, xmax1); - float intersectYmax = std::min(ymax0, ymax1); - - float intersectArea = std::max((intersectYmax - intersectYmin), 0.0f) * - std::max((intersectXmax - intersectXmin), 0.0f); - return intersectArea / (area0 + area1 - intersectArea); - } - - int PostProcess::Nms(BoxesList& boxeslist, int mode, float threshold, Boxes& nmsboxes) - { - LOGI("ENTER"); - - if (mode != 0) { - LOGI("Skip Nms"); - LOGI("LEAVE"); - return MEDIA_VISION_ERROR_NONE; - } - - LOGI("threshold: %.3f", threshold); - bool isIgnore = false; - Boxes candidateBoxes; - for (auto& boxList : boxeslist) { - if (boxList.size() <=0 ) - continue; - - std::sort(boxList.begin(), boxList.end(), compareScore); - candidateBoxes.clear(); - for (auto& decodedBox : boxList) { - isIgnore = false; - for (auto candidateBox = candidateBoxes.rbegin(); candidateBox != candidateBoxes.rend(); ++candidateBox) { - // compare decodedBox with previous one - float iouValue = calcIntersectionOverUnion(decodedBox, (*candidateBox)); - LOGI("iouValue: %.3f", iouValue); - if (iouValue >= threshold) { - isIgnore = true; - break; - } - } - if (!isIgnore) { - candidateBoxes.push_back(decodedBox); - } - } - if (candidateBoxes.size() > 0) { - nmsboxes.insert(nmsboxes.begin(), candidateBoxes.begin(), candidateBoxes.end()); - } - } - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/Posture.cpp b/mv_inference/inference/src/Posture.cpp deleted file mode 100644 index 14c0cec6..00000000 --- a/mv_inference/inference/src/Posture.cpp +++ /dev/null @@ -1,362 +0,0 @@ -/** - * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" -#include "Posture.h" - -#include -#include -#include -#include -#include -#include - -#define POSE_OFFSET_VALUE 20 - -namespace mediavision -{ -namespace inference -{ -Posture::Posture() : - mBvhParser(), - mBvh(), - mPose() -{ - LOGI("ENTER"); - - mMotionToPoseMap.clear(); - mPose.assign(HUMAN_POSE_MAX_LANDMARKS, std::make_pair(false, cv::Point(-1,-1))); - - mPoseParts.assign(HUMAN_POSE_MAX_PARTS, std::make_pair(false, std::vector())); - - LOGI("LEAVE"); -} - -Posture::~Posture() -{ - LOGI("ENTER"); - - std::vector>().swap(mPose); - - LOGI("LEAVE"); -} - - -int Posture::getParts(int parts, - std::vector>& pose, - std::vector>>& posePart) -{ - LOGI("ENTER"); - // head - if (parts & MV_INFERENCE_HUMAN_BODY_PART_HEAD) { - LOGI("HEAD"); - if (pose[0].first == false || pose[1].first == false || pose[2].first == false) { - posePart[0].first = false; - } else { - posePart[0].first = true; - posePart[0].second.push_back(getUnitVectors(pose[0].second, pose[1].second)); - posePart[0].second.push_back(getUnitVectors(pose[1].second, pose[2].second)); - } - } - - // right arm - if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT) { - LOGI("ARM-R"); - if (pose[3].first == false || pose[4].first == false || pose[5].first == false) { - posePart[1].first = false; - } else { - posePart[1].first = true; - posePart[1].second.push_back(getUnitVectors(pose[3].second, pose[4].second)); - posePart[1].second.push_back(getUnitVectors(pose[4].second, pose[5].second)); - } - } - - // left arm - if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT) { - LOGI("ARM-L"); - if (pose[6].first == false || pose[7].first == false || pose[8].first == false) { - posePart[2].first = false; - } else { - posePart[2].first = true; - posePart[2].second.push_back(getUnitVectors(pose[6].second, pose[7].second)); - posePart[2].second.push_back(getUnitVectors(pose[7].second, pose[8].second)); - } - } - - // right leg - if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT) { - LOGI("LEG-R"); - if (pose[10].first == false || pose[11].first == false || pose[12].first == false) { - posePart[3].first = false; - } else { - posePart[3].first = true; - posePart[3].second.push_back(getUnitVectors(pose[10].second, pose[11].second)); - posePart[3].second.push_back(getUnitVectors(pose[11].second, pose[12].second)); - } - } - - // left leg - if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT) { - LOGI("LEG-L"); - if (pose[13].first == false || pose[14].first == false || pose[15].first == false) { - posePart[4].first = false; - } else { - posePart[4].first = true; - posePart[4].second.push_back(getUnitVectors(pose[13].second, pose[14].second)); - posePart[4].second.push_back(getUnitVectors(pose[14].second, pose[15].second)); - - } - } - - // body - if (parts & MV_INFERENCE_HUMAN_BODY_PART_BODY) { - LOGI("BODY"); - if (pose[2].first == false || pose[9].first == false || - pose[10].first == false || pose[13].first == false) { - posePart[5].first = false; - } else { - posePart[5].first = true; - posePart[5].second.push_back(getUnitVectors(pose[2].second, pose[9].second)); - posePart[5].second.push_back(getUnitVectors(pose[9].second, pose[10].second)); - posePart[5].second.push_back(getUnitVectors(pose[9].second, pose[13].second)); - } - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; -} - -int Posture::setPoseFromFile(const std::string motionCaptureFilePath, const std::string motionMappingFilePath) -{ - LOGI("ENTER"); - - int ret = MEDIA_VISION_ERROR_NONE; - - // parsing motion capture file - LOGD("%s", motionCaptureFilePath.c_str()); - LOGD("%s", motionMappingFilePath.c_str()); - ret = mBvhParser.parse(motionCaptureFilePath.c_str(), &mBvh); - LOGD("frames: %d",mBvh.num_frames()); - - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to parse a file [%s]", motionCaptureFilePath.c_str()); - return MEDIA_VISION_ERROR_INTERNAL; - } - - mBvh.recalculate_joints_ltm(); - - LOGD("reading motion mapping...."); - // read motion mapping file - std::ifstream fp(motionMappingFilePath.c_str()); - if (!fp.is_open()) { - LOGE("Fail to open %s", motionMappingFilePath.c_str()); - return MEDIA_VISION_ERROR_INVALID_PATH; - } - - std::string line; - mMotionToPoseMap.clear(); - while (!fp.eof()) { - std::getline(fp, line); - - if (line.empty()) - continue; - - LOGD("%s", line.c_str()); - std::istringstream lineStream(line); - std::string token; - std::vector parsedString; - while(getline(lineStream, token, ',')) { - parsedString.push_back(token); - } - LOGD("name: %s, mapping index: %d", parsedString[0].c_str(), std::stoi(parsedString[1])); - mMotionToPoseMap.insert(make_pair(parsedString[0], std::stoi(parsedString[1])-1)); - } - - fp.close(); - LOGD("mapping size is %zd", mMotionToPoseMap.size()); - - // convert bvh to pose - //convertBvhToPose(); - //for (std::shared_ptr) - - float pointX, pointY, minX, minY, maxX, maxY; - minX = minY = FLT_MAX; - maxX = maxY = FLT_MIN; - for (std::shared_ptr joint : mBvh.joints()) { - std::map::iterator it = mMotionToPoseMap.find(std::string(joint->name())); - if (it != mMotionToPoseMap.end()) { - pointX = joint->pos(0)[0]; - pointY = joint->pos(0)[1]; - if (pointX < minX) - minX = pointX; - - if (pointY < minY) - minY = pointY; - - if (pointX > maxX) - maxX = pointX; - - if (pointY > maxY) - maxY = pointY; - - mPose[it->second].first = true; - mPose[it->second].second = cv::Point(pointX, pointY); - LOGD("%d: (%f,%f)", it->second, pointX, pointY); - } - } - - // add offset to make x > 0 and y > 0 - int height = (int)maxY - (int)minY + POSE_OFFSET_VALUE; - for (std::vector>::iterator iter = mPose.begin(); - iter != mPose.end(); iter++) { - if (iter->first == false) - continue; - - iter->second.x -= minX; - iter->second.y -= minY; - - iter->second.x += POSE_OFFSET_VALUE; - iter->second.y += POSE_OFFSET_VALUE; - - iter->second.y = height - iter->second.y; - - LOGI("(%d, %d)", iter->second.x, iter->second.y); - } - - ret = getParts((MV_INFERENCE_HUMAN_BODY_PART_HEAD | - MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT | - MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT | - MV_INFERENCE_HUMAN_BODY_PART_BODY | - MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT | - MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT), - mPose, mPoseParts); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to getPartse"); - return ret; - } - - LOGI("LEAVE"); - - return ret; -} - -cv::Vec2f Posture::getUnitVectors(cv::Point point1, cv::Point point2) -{ - LOGI("ENTER"); - - cv::Vec2i vec(point1.x - point2.x, point1.y - point2.y); - cv::Vec2f unitVec (vec[0]/cv::norm(vec, cv::NORM_L1), vec[1]/cv::norm(vec, cv::NORM_L1)); - - LOGI("LEAVE"); - - return unitVec; -} - -float Posture::cosineSimilarity(std::vector vec1, std::vector vec2, int size) -{ - float numer = 0.0f; - float denom1 = 0.0f; - float denom2 = 0.0f; - - float value = 0.0f; - - for (int k = 0; k < size; ++k) { - numer = denom1 = denom2 = 0.0f; - for (int dim = 0; dim <2; ++dim) { - numer += (vec1[k][dim] * vec2[k][dim]); - denom1 += (vec1[k][dim] * vec1[k][dim]); - denom2 += (vec2[k][dim] * vec2[k][dim]); - } - LOGI("similarity: %f", numer / sqrt( denom1 * denom2)); - value += numer / sqrt( denom1 * denom2); - - } - - return value; -} - -float Posture::getSimilarity(int parts, - std::vector>>& posePart, - std::vector>>& actionPart) -{ - float score = 0.0f; - unsigned int bodyCount = 0; - std::vector index; - - if (parts & MV_INFERENCE_HUMAN_BODY_PART_HEAD) { - index.push_back(0); - } - - if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT) { - index.push_back(1); - } - - if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT) { - index.push_back(2); - } - - if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT) { - index.push_back(3); - } - - if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT) { - index.push_back(4); - } - - if (parts & MV_INFERENCE_HUMAN_BODY_PART_BODY) { - index.push_back(5); - } - - for (std::vector::iterator it = index.begin(); it != index.end(); ++it) { - if (posePart[(*it)].first && actionPart[(*it)].first && - (posePart[(*it)].second.size() == actionPart[(*it)].second.size())) { - score += cosineSimilarity(posePart[(*it)].second, actionPart[(*it)].second, posePart[(*it)].second.size()); - - bodyCount += posePart[(*it)].second.size(); - LOGI("body[%d], score[%f], count[%u]", (*it), score, bodyCount); - } - } - - if (bodyCount > 0) - score /= (float)bodyCount; - - LOGD("score: %1.3f", score); - - return score; -} - -int Posture::compare(int parts, std::vector> action, float* score) -{ - LOGI("ENTER"); - - std::vector>> actionParts; - actionParts.assign(6, std::make_pair(false, std::vector())); - int ret = getParts(parts, action, actionParts); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to getPartse"); - return ret; - } - - *score = getSimilarity(parts, mPoseParts, actionParts); - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; -} - -} -} // namespace diff --git a/mv_inference/inference/src/PreProcess.cpp b/mv_inference/inference/src/PreProcess.cpp deleted file mode 100644 index fa65ced2..00000000 --- a/mv_inference/inference/src/PreProcess.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" - -#include -#include -#include -#include -#include -#include "PreProcess.h" - -const int colorConvertTable[][12] = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 0, -1, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_GRAY2BGR565, cv::COLOR_GRAY2RGB, cv::COLOR_GRAY2RGBA }, - { 0, cv::COLOR_YUV2GRAY_I420, -1, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_RGBA2GRAY, cv::COLOR_YUV2RGBA_I420 }, - { 0, cv::COLOR_YUV2GRAY_NV12, 0, -1, 0, 0, 0, 0, 0, 0, cv::COLOR_YUV2RGB_NV12, cv::COLOR_YUV2RGBA_NV12 }, - { 0, cv::COLOR_YUV2GRAY_YV12, 0, 0, -1, 0, 0, 0, 0, 0, cv::COLOR_YUV2RGB_YV12, cv::COLOR_YUV2RGBA_YV12 }, - { 0, cv::COLOR_YUV2GRAY_NV21, 0, 0, 0, -1, 0, 0, 0, 0, cv::COLOR_YUV2RGB_NV21, cv::COLOR_YUV2RGBA_NV21 }, - { 0, cv::COLOR_YUV2GRAY_YUYV, 0, 0, 0, 0, -1, 0, 0, 0, cv::COLOR_YUV2RGB_YUYV, cv::COLOR_YUV2RGBA_YUYV }, - { 0, cv::COLOR_YUV2GRAY_UYVY, 0, 0, 0, 0, 0, -1, 0, 0, cv::COLOR_YUV2BGR_UYVY, cv::COLOR_YUV2BGRA_UYVY }, - { 0, cv::COLOR_YUV2GRAY_Y422, 0, 0, 0, 0, 0, 0, -1, 0, cv::COLOR_YUV2RGB_Y422, cv::COLOR_YUV2RGBA_Y422 }, - { 0, cv::COLOR_BGR5652GRAY, 0, 0, 0, 0, 0, 0, 0, -1, cv::COLOR_BGR5652BGR, cv::COLOR_BGR5652BGRA }, - { 0, cv::COLOR_RGB2GRAY, 0, 0, 0, 0, 0, 0, 0, 0, -1, cv::COLOR_RGB2RGBA }, - { 0, cv::COLOR_RGBA2GRAY, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_BGRA2BGR565, cv::COLOR_RGBA2RGB, -1} -}; - -namespace mediavision -{ -namespace inference -{ - int PreProcess::Resize(cv::Mat& source, cv::Mat& dest, cv::Size size) - { - LOGI("ENTER"); - - try { - cv::resize(source, dest, size); - } catch (cv::Exception& e) { - LOGE("Fail to resize with msg: %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - - int PreProcess::ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType) - { - LOGI("ENTER"); - - auto conversionColor = static_cast(colorConvertTable[sType][dType]); - if (conversionColor == -1) {/* Don't need conversion */ - dest = source; - } else if (conversionColor > 0) { - /* Class for representation the given image as cv::Mat before conversion */ - cv::cvtColor(source, dest, conversionColor); - } else { - LOGE("Fail to ColorConvert"); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int PreProcess::Normalize(cv::Mat& source, cv::Mat& dest, - const std::vector& mean, const std::vector& std) - { - LOGI("ENTER"); - try { - cv::subtract(source, cv::Scalar(mean[0], mean[1], mean[2]), dest); - source = dest; - cv::divide(source, cv::Scalar(std[0], std[1], std[2]), dest); - } catch (cv::Exception& e) { - LOGE("Fail to substract/divide with msg: %s", e.what()); - return MEDIA_VISION_ERROR_INVALID_OPERATION; - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - - int PreProcess::Run(cv::Mat& source, const int colorSpace, - const int dataType, const LayerInfo& layerInfo, - const Options& options, void* buffer) - { - LOGI("ENTER"); - - // dest is a wrapper of the buffer - cv::Mat dest(cv::Size(layerInfo.GetWidth(), layerInfo.GetHeight()), - dataType, buffer); - - cv::Mat cvSource, cvDest; - // cvSource has new allocation with dest.size() - Resize(source, cvSource, dest.size()); - - // cvDest has new allocation if it's colorSpace is not RGB888 - // cvDest share the data with cvSource it's colorSpace is RGB888 - ColorConvert(cvSource, cvDest, colorSpace, layerInfo.colorSpace); - - cvDest.convertTo(dest, dest.type()); - - if (options.normalization.use) { - Normalize(dest, dest, options.normalization.mean, options.normalization.std); - } - - LOGI("LEAVE"); - - return MEDIA_VISION_ERROR_NONE; - } - -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/TensorBuffer.cpp b/mv_inference/inference/src/TensorBuffer.cpp deleted file mode 100644 index 6e4fc30c..00000000 --- a/mv_inference/inference/src/TensorBuffer.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/** - * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" -#include "TensorBuffer.h" - -#include - -#include -#include -#include -#include - -namespace mediavision -{ -namespace inference -{ - - bool TensorBuffer::empty() - { - return mTensorBuffer.empty(); - } - - bool TensorBuffer::exist(std::string name) - { - return getTensorBuffer(name) != nullptr; - } - - void TensorBuffer::clear() - { - for (auto& tensorBuffer : mTensorBuffer) { - auto& tBuffer = tensorBuffer.second; - if (tBuffer.owner_is_backend) { - continue; - } - - if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_FLOAT32) { - delete[] static_cast(tBuffer.buffer); - } else if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_INT64) { - delete[] static_cast(tBuffer.buffer); - } else if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT32) { - delete[] static_cast(tBuffer.buffer); - } else if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT16) { - delete[] static_cast(tBuffer.buffer); - } else { - delete[] static_cast(tBuffer.buffer); - } - } - - LOGI("Tensor(%zu) have been released.", mTensorBuffer.size()); - IETensorBuffer().swap(mTensorBuffer); - } - - size_t TensorBuffer::size() - { - return mTensorBuffer.size(); - } - - IETensorBuffer& TensorBuffer::getAllTensorBuffer() - { - return mTensorBuffer; - } - - inference_engine_tensor_buffer* TensorBuffer::getTensorBuffer(std::string name) - { - if (mTensorBuffer.find(name) == mTensorBuffer.end()){ - return nullptr; - } - - return &mTensorBuffer[name]; - } - - bool TensorBuffer::setTensorBuffer(std::string name, inference_engine_tensor_buffer& buffer) - { - if (name.empty() || - buffer.buffer == nullptr) { - LOGE("Invalid parameters: %s, %p", name.c_str(), buffer.buffer); - return false; - } - - auto ret = mTensorBuffer.insert(std::make_pair(name, buffer)); - if (ret.second == false) { - LOGE("Fail to insert %s with buffer %p", name.c_str(), buffer.buffer); - return false; - } - - return true; - } - - template - T TensorBuffer::getValue(std::string name, int idx) - { - inference_engine_tensor_buffer* tBuffer = - getTensorBuffer(name); - if (tBuffer == nullptr) { - throw std::invalid_argument(name); - } - - switch (tBuffer->data_type) { - case INFERENCE_TENSOR_DATA_TYPE_FLOAT32: - return static_cast(static_cast(tBuffer->buffer)[idx]); - case INFERENCE_TENSOR_DATA_TYPE_INT64: - return static_cast( - static_cast(tBuffer->buffer)[idx]); - case INFERENCE_TENSOR_DATA_TYPE_UINT32: - return static_cast( - static_cast(tBuffer->buffer)[idx]); - case INFERENCE_TENSOR_DATA_TYPE_UINT8: - return static_cast( - static_cast(tBuffer->buffer)[idx]); - case INFERENCE_TENSOR_DATA_TYPE_UINT16: - return static_cast( - static_cast(tBuffer->buffer)[idx]); - default: - break; - } - - throw std::invalid_argument("Invalid data type"); - } - - template float TensorBuffer::getValue(std::string, int); - template int TensorBuffer::getValue(std::string, int); -} /* Inference */ -} /* MediaVision */ diff --git a/mv_inference/inference/src/mv_inference.c b/mv_inference/inference/src/mv_inference.c deleted file mode 100644 index 454354e8..00000000 --- a/mv_inference/inference/src/mv_inference.c +++ /dev/null @@ -1,499 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" -#include "mv_inference.h" - -#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT - -/* Include headers of licensed inference module here. */ -//#include "mv_inference_lic.h" - -#else - -/* Include headers of open inference module here. */ -#include "mv_inference_open.h" - -#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ - -/** - * @file mv_inference.c - * @brief This file contains Media Vision inference module. - */ - -int mv_inference_create(mv_inference_h *infer) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_NULL_ARG_CHECK(infer); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT - - //ret = mv_inference_create_lic(infer); - -#else - - ret = mv_inference_create_open(infer); - -#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ - - MEDIA_VISION_FUNCTION_LEAVE(); - return ret; -} - -int mv_inference_destroy(mv_inference_h infer) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(infer); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT - - //ret = mv_inference_destroy_lic(infer); - -#else - - ret = mv_inference_destroy_open(infer); - -#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ - - MEDIA_VISION_FUNCTION_LEAVE(); - return ret; -} - -int mv_inference_configure(mv_inference_h infer, - mv_engine_config_h engine_config) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(infer); - MEDIA_VISION_INSTANCE_CHECK(engine_config); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT - - //ret = mv_inference_configure_lic(infer); - -#else - - ret = mv_inference_configure_engine_open(infer, engine_config); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to configure engine and target"); - return ret; - } - -#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ - - MEDIA_VISION_FUNCTION_LEAVE(); - return ret; -} - -int mv_inference_prepare(mv_inference_h infer) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(infer); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h engine_config = mv_inference_get_engine_config(infer); - -#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT - - //ret = mv_inference_prepare_lic(infer); - -#else - - ret = mv_inference_configure_model_open(infer, engine_config); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to configure model"); - return ret; - } - - // input tensor, input layer - ret = mv_inference_configure_input_info_open(infer, engine_config); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to configure input info"); - return ret; - } - - // output layer - ret = mv_inference_configure_output_info_open(infer, engine_config); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to configure output info"); - return ret; - } - - // maximum candidates, threshold - ret = mv_inference_configure_post_process_info_open(infer, engine_config); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to configure post process info"); - return ret; - } - - ret = mv_inference_prepare_open(infer); - -#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ - - MEDIA_VISION_FUNCTION_LEAVE(); - return ret; -} - -int mv_inference_foreach_supported_engine( - mv_inference_h infer, mv_inference_supported_engine_cb callback, - void *user_data) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(infer); - MEDIA_VISION_NULL_ARG_CHECK(callback); - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT - - // ret = mv_inference_foreach_supported_engine_lic(infer, callback, user_data); - -#else - - ret = mv_inference_foreach_supported_engine_open(infer, callback, - user_data); - -#endif - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_inference_image_classify(mv_source_h source, mv_inference_h infer, - mv_rectangle_s *roi, - mv_inference_image_classified_cb classified_cb, - void *user_data) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_image_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(source); - MEDIA_VISION_INSTANCE_CHECK(infer); - MEDIA_VISION_NULL_ARG_CHECK(classified_cb); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT - - /* - ret = mv_inference_image_classify_lic(source, infer, classified_cb, user_data); - */ - -#else - - ret = mv_inference_image_classify_open(source, infer, roi, classified_cb, - user_data); - -#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_inference_object_detect(mv_source_h source, mv_inference_h infer, - mv_inference_object_detected_cb detected_cb, - void *user_data) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_image_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(source); - MEDIA_VISION_INSTANCE_CHECK(infer); - MEDIA_VISION_NULL_ARG_CHECK(detected_cb); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT - - /* - ret = mv_inference_object_detect_lic(source, infer, classified_cb, user_data); - */ - -#else - - ret = mv_inference_object_detect_open(source, infer, detected_cb, - user_data); - -#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_inference_face_detect(mv_source_h source, mv_inference_h infer, - mv_inference_face_detected_cb detected_cb, - void *user_data) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_face_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(source); - MEDIA_VISION_INSTANCE_CHECK(infer); - MEDIA_VISION_NULL_ARG_CHECK(detected_cb); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT - /* - ret = mv_inference_face_detect_lic(source, infer, detected_cb, user_data); - */ -#else - - ret = mv_inference_face_detect_open(source, infer, detected_cb, user_data); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; - -#endif -} - -int mv_inference_facial_landmark_detect( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_facial_landmark_detected_cb detected_cb, void *user_data) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_face_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(source); - MEDIA_VISION_INSTANCE_CHECK(infer); - MEDIA_VISION_NULL_ARG_CHECK(detected_cb); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - -#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT - /* - ret = mv_inference_facial_landmark_detect_lic(source, infer, detected_cb, user_data); - */ -#else - - ret = mv_inference_facial_landmark_detect_open(source, infer, roi, - detected_cb, user_data); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; - -#endif -} - -int mv_inference_pose_landmark_detect( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_pose_landmark_detected_cb detected_cb, void *user_data) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_face_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(source); - MEDIA_VISION_INSTANCE_CHECK(infer); - MEDIA_VISION_NULL_ARG_CHECK(detected_cb); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - - ret = mv_inference_pose_landmark_detect_open(source, infer, roi, - detected_cb, user_data); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_inference_pose_get_number_of_poses(mv_inference_pose_result_h result, int *number_of_poses) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(result); - - MEDIA_VISION_NULL_ARG_CHECK(number_of_poses); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_inference_pose_get_number_of_poses_open(result, number_of_poses); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - - -int mv_inference_pose_get_number_of_landmarks(mv_inference_pose_result_h result, int *number_of_landmarks) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(result); - - MEDIA_VISION_NULL_ARG_CHECK(number_of_landmarks); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_inference_pose_get_number_of_landmarks_open(result, number_of_landmarks); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_inference_pose_get_landmark(mv_inference_pose_result_h result, - int pose_index, int part_index, mv_point_s *location, float *score) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(result); - - MEDIA_VISION_NULL_ARG_CHECK(location); - MEDIA_VISION_NULL_ARG_CHECK(score); - - if (pose_index < 0 || part_index < 0) - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_inference_pose_get_landmark_open(result, pose_index, part_index, location, score); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_inference_pose_get_label(mv_inference_pose_result_h result, int pose_index, int *label) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(result); - - MEDIA_VISION_NULL_ARG_CHECK(label); - - if (pose_index < 0) - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_inference_pose_get_label_open(result, pose_index, label); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - - -int mv_pose_create(mv_pose_h *pose) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_NULL_ARG_CHECK(pose); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_pose_create_open(pose); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_pose_destroy(mv_pose_h pose) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(pose); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_pose_destroy_open(pose); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_pose_set_from_file(mv_pose_h pose, const char *motion_capture_file_path, const char *motion_mapping_file_path) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(pose); - MEDIA_VISION_NULL_ARG_CHECK(motion_capture_file_path); - MEDIA_VISION_NULL_ARG_CHECK(motion_mapping_file_path); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_pose_set_from_file_open(pose, motion_capture_file_path, motion_mapping_file_path); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} - -int mv_pose_compare(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score) -{ - MEDIA_VISION_SUPPORT_CHECK( - __mv_inference_check_system_info_feature_supported()); - MEDIA_VISION_INSTANCE_CHECK(pose); - MEDIA_VISION_INSTANCE_CHECK(action); - MEDIA_VISION_NULL_ARG_CHECK(score); - - MEDIA_VISION_FUNCTION_ENTER(); - - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_pose_compare_open(pose, action, parts, score); - - MEDIA_VISION_FUNCTION_LEAVE(); - - return ret; -} diff --git a/mv_inference/inference/src/mv_inference_open.cpp b/mv_inference/inference/src/mv_inference_open.cpp deleted file mode 100644 index 1c4eb7ed..00000000 --- a/mv_inference/inference/src/mv_inference_open.cpp +++ /dev/null @@ -1,1020 +0,0 @@ -/** - * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "mv_private.h" -#include "mv_inference_open.h" - -#include "Inference.h" -#include "Posture.h" - -#include -#include -#include - -using namespace mediavision::inference; - -static int check_mv_inference_engine_version(mv_engine_config_h engine_config, - bool *is_new_version) -{ - int oldType = 0, newType = 0; - - int ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_TARGET_TYPE, &oldType); - if (ret != MEDIA_VISION_ERROR_NONE) - oldType = -1; - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_TARGET_DEVICE_TYPE, &newType); - if (ret != MEDIA_VISION_ERROR_NONE) - newType = -1; - - // At least one of two target device types of - // media-vision-config.json file should have CPU device. - if (oldType == -1 && newType == -1) - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - - // If values of both types are changed then return an error. - // only one of two types should be used. - if (oldType != MV_INFERENCE_TARGET_CPU && - newType != MV_INFERENCE_TARGET_DEVICE_CPU) { - LOGE("Please use only one of below two device types."); - LOGE("MV_INFERENCE_TARGET_TYPE(deprecated) or MV_INFERENCE_TARGET_DEVICE_TYPE(recommended)."); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - LOGI("oldType = %d, newType = %d", oldType, newType); - - // If default value of only old type is changed then use old type. - // Otherwise, use new type in following cases, - // - all default values of two types aren't changed. - // (oldType == MV_INFERENCE_TARGET_CPU && newType == MV_INFERENCE_TARGET_DEVICE_CPU) - // - default value of only new type is changed. - // (oldType == MV_INFERENCE_TARGET_CPU && (newType != -1 && newType != MV_INFERENCE_TARGET_DEVICE_CPU)) - if ((oldType != -1 && oldType != MV_INFERENCE_TARGET_CPU) && - newType == MV_INFERENCE_TARGET_DEVICE_CPU) - *is_new_version = false; - else - *is_new_version = true; - - return MEDIA_VISION_ERROR_NONE; -} - -mv_engine_config_h mv_inference_get_engine_config(mv_inference_h infer) -{ - Inference *pInfer = static_cast(infer); - return pInfer->GetEngineConfig(); -} - -int mv_inference_create_open(mv_inference_h *infer) -{ - if (infer == NULL) { - LOGE("Handle can't be created because handle pointer is NULL"); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - (*infer) = static_cast(new (std::nothrow) Inference()); - - if (*infer == NULL) { - LOGE("Failed to create inference handle"); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - LOGD("Inference handle [%p] has been created", *infer); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_inference_destroy_open(mv_inference_h infer) -{ - if (!infer) { - LOGE("Handle can't be destroyed because handle is NULL"); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - LOGD("Destroying inference handle [%p]", infer); - delete static_cast(infer); - LOGD("Inference handle has been destroyed"); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_inference_configure_model_open(mv_inference_h infer, - mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - - char *modelConfigFilePath = NULL; - char *modelWeightFilePath = NULL; - char *modelUserFilePath = NULL; - double modelMeanValue = 0.0; - int backendType = 0; - size_t userFileLength = 0; - - // TODO: a temporal variable, later, it should be removed. - std::string metaFilePath; - - ret = mv_engine_config_get_string_attribute( - engine_config, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - &modelConfigFilePath); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get model configuration file path"); - goto _ERROR_; - } - - ret = mv_engine_config_get_string_attribute( - engine_config, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - &modelWeightFilePath); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get model weight file path"); - goto _ERROR_; - } - - ret = mv_engine_config_get_string_attribute( - engine_config, MV_INFERENCE_MODEL_USER_FILE_PATH, - &modelUserFilePath); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get model user file path"); - goto _ERROR_; - } - - ret = mv_engine_config_get_double_attribute( - engine_config, MV_INFERENCE_MODEL_MEAN_VALUE, &modelMeanValue); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get model mean value"); - goto _ERROR_; - } - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_BACKEND_TYPE, &backendType); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference backend type"); - goto _ERROR_; - } - - if (access(modelWeightFilePath, F_OK)) { - LOGE("weightFilePath in [%s] ", modelWeightFilePath); - ret = MEDIA_VISION_ERROR_INVALID_PATH; - goto _ERROR_; - } - - if ((backendType > MV_INFERENCE_BACKEND_NONE && - backendType < MV_INFERENCE_BACKEND_MAX) && - (backendType != MV_INFERENCE_BACKEND_TFLITE) && - (backendType != MV_INFERENCE_BACKEND_ARMNN) && - (backendType == MV_INFERENCE_BACKEND_MLAPI && - (pInfer->GetTargetType() & MV_INFERENCE_TARGET_DEVICE_CUSTOM)) && - (backendType != MV_INFERENCE_BACKEND_ONE)) { - if (access(modelConfigFilePath, F_OK)) { - LOGE("modelConfigFilePath in [%s] ", modelConfigFilePath); - ret = MEDIA_VISION_ERROR_INVALID_PATH; - goto _ERROR_; - } - } - - userFileLength = strlen(modelUserFilePath); - if (userFileLength > 0 && access(modelUserFilePath, F_OK)) { - LOGE("categoryFilePath in [%s] ", modelUserFilePath); - ret = MEDIA_VISION_ERROR_INVALID_PATH; - goto _ERROR_; - } - - pInfer->ConfigureModelFiles(std::string(modelConfigFilePath), - std::string(modelWeightFilePath), - std::string(modelUserFilePath)); - /* FIXME - * temporal code lines to get a metafile, which has the same name - * with modelsWeightFilePath except the extension. - * Later, it should get a metafilename and the below lines should be - * removed. - */ - metaFilePath = std::string(modelWeightFilePath).substr(0, - std::string(modelWeightFilePath).find_last_of('.')) + ".json"; - LOGI("metaFilePath: %s", metaFilePath.c_str()); - pInfer->ParseMetadata(metaFilePath); - -_ERROR_: - if (modelConfigFilePath) - free(modelConfigFilePath); - - if (modelWeightFilePath) - free(modelWeightFilePath); - - if (modelUserFilePath) - free(modelUserFilePath); - - LOGI("LEAVE"); - - return ret; -} - -int mv_inference_configure_tensor_info_open(mv_inference_h infer, - mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - - int tensorWidth, tensorHeight, tensorDim, tensorCh; - double meanValue, stdValue; - - // This should be one. only one batch is supported - tensorDim = 1; - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_INPUT_TENSOR_WIDTH, &tensorWidth); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get tensor width"); - goto _ERROR_; - } - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_INPUT_TENSOR_HEIGHT, &tensorHeight); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get tensor height"); - goto _ERROR_; - } - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_INPUT_TENSOR_CHANNELS, &tensorCh); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get tensor channels"); - goto _ERROR_; - } - - ret = mv_engine_config_get_double_attribute( - engine_config, MV_INFERENCE_MODEL_MEAN_VALUE, &meanValue); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get meanValue"); - goto _ERROR_; - } - - ret = mv_engine_config_get_double_attribute( - engine_config, MV_INFERENCE_MODEL_STD_VALUE, &stdValue); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get stdValue"); - goto _ERROR_; - } - - pInfer->ConfigureTensorInfo(tensorWidth, tensorHeight, tensorDim, tensorCh, - stdValue, meanValue); - -_ERROR_: - - LOGI("LEAVE"); - - return ret; -} - -int mv_inference_configure_input_info_open(mv_inference_h infer, - mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - - int tensorWidth, tensorHeight, tensorDim, tensorCh; - double meanValue, stdValue; - char *node_name = NULL; - int dataType = 0; - - // This should be one. only one batch is supported - tensorDim = 1; - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_INPUT_TENSOR_WIDTH, &tensorWidth); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get tensor width"); - goto _ERROR_; - } - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_INPUT_TENSOR_HEIGHT, &tensorHeight); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get tensor height"); - goto _ERROR_; - } - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_INPUT_TENSOR_CHANNELS, &tensorCh); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get tensor channels"); - goto _ERROR_; - } - - ret = mv_engine_config_get_double_attribute( - engine_config, MV_INFERENCE_MODEL_MEAN_VALUE, &meanValue); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get meanValue"); - goto _ERROR_; - } - - ret = mv_engine_config_get_double_attribute( - engine_config, MV_INFERENCE_MODEL_STD_VALUE, &stdValue); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get stdValue"); - goto _ERROR_; - } - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_INPUT_DATA_TYPE, &dataType); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get a input tensor data type"); - goto _ERROR_; - } - - ret = mv_engine_config_get_string_attribute( - engine_config, MV_INFERENCE_INPUT_NODE_NAME, &node_name); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get tensor width"); - goto _ERROR_; - } - - pInfer->ConfigureInputInfo( - tensorWidth, tensorHeight, tensorDim, tensorCh, stdValue, meanValue, - dataType, std::vector(1, std::string(node_name))); - -_ERROR_: - - if (node_name) { - free(node_name); - node_name = NULL; - } - - LOGI("LEAVE"); - - return ret; -} - -int mv_inference_configure_engine_open(mv_inference_h infer, - mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - int backendType = 0; - int targetTypes = 0; - int ret = MEDIA_VISION_ERROR_NONE; - - pInfer->SetEngineConfig(engine_config); - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_BACKEND_TYPE, &backendType); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference backend type"); - goto _ERROR_; - } - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_TARGET_DEVICE_TYPE, &targetTypes); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference target type"); - goto _ERROR_; - } - - ret = pInfer->ConfigureBackendType( - (mv_inference_backend_type_e) backendType); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to configure a backend type."); - goto _ERROR_; - } - - bool is_new_version; - - // Check if new inference engine framework or old one. - // new inference engine framework has different mv_inference_target_type_e enumeration values - // to support multiple inference target devices. So in case of old version, - // enumeration value given by user should be converted to new value, which - // will be done at ConfigureTargetTypes callback internally. - // Ps. this function will be dropped with deprecated code version-after-next of Tizen. - ret = check_mv_inference_engine_version(engine_config, &is_new_version); - if (ret != MEDIA_VISION_ERROR_NONE) - goto _ERROR_; - - // Create a inference-engine-common class object and load its corresponding library. - // Ps. Inference engine gets a capability from a given backend by Bind call - // so access to mBackendCapacity should be done after Bind. - ret = pInfer->Bind(); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to bind a backend engine."); - } - - if (is_new_version) { - // Use new type. - if (pInfer->ConfigureTargetDevices(targetTypes) != - MEDIA_VISION_ERROR_NONE) { - LOGE("Tried to configure invalid target types."); - goto _ERROR_; - } - } else { - // Convert old type to new one and then use it. - if (pInfer->ConfigureTargetTypes(targetTypes) != - MEDIA_VISION_ERROR_NONE) { - LOGE("Tried to configure invalid target types."); - goto _ERROR_; - } - } - - LOGI("LEAVE"); -_ERROR_: - return ret; -} - -int mv_inference_configure_output_open(mv_inference_h infer, - mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int maxOutput = 0; - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_OUTPUT_MAX_NUMBER, &maxOutput); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference output maximum numbers"); - goto _ERROR_; - } - - pInfer->ConfigureOutput(maxOutput); - - LOGI("LEAVE"); -_ERROR_: - return ret; -} - -int mv_inference_configure_confidence_threshold_open( - mv_inference_h infer, mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - double threshold = 0; - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_engine_config_get_double_attribute( - engine_config, MV_INFERENCE_CONFIDENCE_THRESHOLD, &threshold); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference confidence threshold value"); - goto _ERROR_; - } - - pInfer->ConfigureThreshold(threshold); - - LOGI("LEAVE"); -_ERROR_: - return ret; -} - -int mv_inference_configure_post_process_info_open( - mv_inference_h infer, mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int maxOutput = 0; - double threshold = 0; - int ret = MEDIA_VISION_ERROR_NONE; - - ret = mv_engine_config_get_int_attribute( - engine_config, MV_INFERENCE_OUTPUT_MAX_NUMBER, &maxOutput); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference output maximum numbers"); - goto _ERROR_; - } - - pInfer->ConfigureOutput(maxOutput); - - ret = mv_engine_config_get_double_attribute( - engine_config, MV_INFERENCE_CONFIDENCE_THRESHOLD, &threshold); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference confidence threshold value"); - goto _ERROR_; - } - - pInfer->ConfigureThreshold(threshold); - - LOGI("LEAVE"); -_ERROR_: - return ret; -} - -int mv_inference_configure_output_info_open(mv_inference_h infer, - mv_engine_config_h engine_config) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - int idx = 0; - char **node_names = NULL; - int size = 0; - std::vector names; - ret = mv_engine_config_get_array_string_attribute( - engine_config, MV_INFERENCE_OUTPUT_NODE_NAMES, &node_names, &size); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get _output_node_names"); - goto _ERROR_; - } - - for (idx = 0; idx < size; ++idx) - names.push_back(std::string(node_names[idx])); - - pInfer->ConfigureOutputInfo(names); - -_ERROR_: - - if (node_names) { - for (idx = 0; idx < size; ++idx) { - free(node_names[idx]); - } - free(node_names); - node_names = NULL; - } - - LOGI("LEAVE"); - - return ret; -} - -int mv_inference_prepare_open(mv_inference_h infer) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - - // Pass parameters needed to load model files to a backend engine. - ret = pInfer->Prepare(); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to prepare inference"); - return ret; - } - - // Request to load model files to a backend engine. - ret = pInfer->Load(); - if (ret != MEDIA_VISION_ERROR_NONE) - LOGE("Fail to load model files."); - - LOGI("LEAVE"); - - return ret; -} - -int mv_inference_foreach_supported_engine_open( - mv_inference_h infer, mv_inference_supported_engine_cb callback, - void *user_data) -{ - LOGI("ENTER"); - - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - - //bool isSupported = false; - //char str[1024] = {'\0'}; - std::pair backend; - for (int i = 0; i < MV_INFERENCE_BACKEND_MAX; ++i) { - backend = pInfer->GetSupportedInferenceBackend(i); - callback((backend.first).c_str(), backend.second, user_data); - } - - LOGI("LEAVE"); - - return ret; -} - -int mv_inference_image_classify_open( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_image_classified_cb classified_cb, void *user_data) -{ - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - int numberOfOutputs = 0; - std::vector sources; - std::vector rects; - - sources.push_back(source); - - if (roi != NULL) - rects.push_back(*roi); - - ret = pInfer->Run(sources, rects); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to run inference"); - return ret; - } - - ImageClassificationResults classificationResults; - - ret = pInfer->GetClassficationResults(&classificationResults); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference results"); - return ret; - } - - numberOfOutputs = classificationResults.number_of_classes; - - int *indices = classificationResults.indices.data(); - float *confidences = classificationResults.confidences.data(); - static const int START_CLASS_NUMBER = 10; - static std::vector names(START_CLASS_NUMBER); - - if (numberOfOutputs > START_CLASS_NUMBER) - names.resize(numberOfOutputs); - - LOGE("mv_inference_open: number_of_classes: %d\n", - classificationResults.number_of_classes); - - for (int n = 0; n < numberOfOutputs; ++n) { - LOGE("names: %s", classificationResults.names[n].c_str()); - names[n] = classificationResults.names[n].c_str(); - } - - classified_cb(source, numberOfOutputs, indices, names.data(), confidences, - user_data); - - return ret; -} - -int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer, - mv_inference_object_detected_cb detected_cb, - void *user_data) -{ - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - int numberOfOutputs = 0; - std::vector sources; - std::vector rects; - - sources.push_back(source); - - ret = pInfer->Run(sources, rects); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to run inference"); - return ret; - } - - ObjectDetectionResults objectDetectionResults; - ret = pInfer->GetObjectDetectionResults(&objectDetectionResults); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference results"); - return ret; - } - - numberOfOutputs = objectDetectionResults.number_of_objects; - - int *indices = objectDetectionResults.indices.data(); - float *confidences = objectDetectionResults.confidences.data(); - static const int START_OBJECT_NUMBER = 20; - static std::vector names(START_OBJECT_NUMBER); - static std::vector locations(START_OBJECT_NUMBER); - - if (numberOfOutputs > START_OBJECT_NUMBER) { - names.resize(numberOfOutputs); - locations.resize(numberOfOutputs); - } - - for (int n = 0; n < numberOfOutputs; ++n) { - LOGE("names: %s", objectDetectionResults.names[n].c_str()); - names[n] = objectDetectionResults.names[n].c_str(); - - locations[n].point.x = objectDetectionResults.locations[n].x; - locations[n].point.y = objectDetectionResults.locations[n].y; - locations[n].width = objectDetectionResults.locations[n].width; - locations[n].height = objectDetectionResults.locations[n].height; - } - - detected_cb(source, numberOfOutputs, indices, names.data(), confidences, - locations.data(), user_data); - - return ret; -} - -int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer, - mv_inference_face_detected_cb detected_cb, - void *user_data) -{ - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - int numberOfOutputs = 0; - std::vector sources; - std::vector rects; - - sources.push_back(source); - - ret = pInfer->Run(sources, rects); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to run inference"); - return ret; - } - - FaceDetectionResults faceDetectionResults; - ret = pInfer->GetFaceDetectionResults(&faceDetectionResults); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference results"); - return ret; - } - - numberOfOutputs = faceDetectionResults.number_of_faces; - - float *confidences = faceDetectionResults.confidences.data(); - std::vector locations(numberOfOutputs); - - for (int n = 0; n < numberOfOutputs; ++n) { - locations[n].point.x = faceDetectionResults.locations[n].x; - locations[n].point.y = faceDetectionResults.locations[n].y; - locations[n].width = faceDetectionResults.locations[n].width; - locations[n].height = faceDetectionResults.locations[n].height; - } - - detected_cb(source, numberOfOutputs, confidences, locations.data(), - user_data); - - return ret; -} - -int mv_inference_facial_landmark_detect_open( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_facial_landmark_detected_cb detected_cb, void *user_data) -{ - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - int numberOfLandmarks = 0; - std::vector sources; - std::vector rects; - - sources.push_back(source); - - if (roi != NULL) - rects.push_back(*roi); - - ret = pInfer->Run(sources, rects); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to run inference"); - return ret; - } - - FacialLandMarkDetectionResults facialLandMarkDetectionResults; - ret = pInfer->GetFacialLandMarkDetectionResults( - &facialLandMarkDetectionResults); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference results"); - return ret; - } - - numberOfLandmarks = facialLandMarkDetectionResults.number_of_landmarks; - - std::vector locations(numberOfLandmarks); - - for (int n = 0; n < numberOfLandmarks; ++n) { - locations[n].x = facialLandMarkDetectionResults.locations[n].x; - locations[n].y = facialLandMarkDetectionResults.locations[n].y; - } - - detected_cb(source, numberOfLandmarks, locations.data(), user_data); - - return ret; -} - -int mv_inference_pose_landmark_detect_open( - mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, - mv_inference_pose_landmark_detected_cb detected_cb, void *user_data) -{ - Inference *pInfer = static_cast(infer); - - int ret = MEDIA_VISION_ERROR_NONE; - std::vector sources; - std::vector rects; - - unsigned int width, height; - ret = mv_source_get_width(source, &width); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get width"); - return ret; - } - - ret = mv_source_get_height(source, &height); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get height"); - return ret; - } - - sources.push_back(source); - - if (roi != NULL) - rects.push_back(*roi); - - ret = pInfer->Run(sources, rects); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to run inference"); - return ret; - } - - mv_inference_pose_result_h result = NULL; - ret = pInfer->GetPoseLandmarkDetectionResults( - &result, width, height); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to get inference results"); - return ret; - } - - mv_inference_pose_s *tmp = static_cast(result); - for (int pose = 0; pose < tmp->number_of_poses; ++pose) { - for (int index = 0; index < tmp->number_of_landmarks_per_pose; ++index) { - LOGI("PoseIdx[%2d]: x[%d], y[%d], score[%.3f]", index, - tmp->landmarks[pose][index].point.x, - tmp->landmarks[pose][index].point.y, - tmp->landmarks[pose][index].score); - } - } - - detected_cb(source, result, user_data); - - return ret; -} - -int mv_inference_pose_get_number_of_poses_open( - mv_inference_pose_result_h result, - int *number_of_poses) -{ - mv_inference_pose_s *handle = static_cast(result); - - *number_of_poses = handle->number_of_poses; - - LOGI("%d", *number_of_poses); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_inference_pose_get_number_of_landmarks_open( - mv_inference_pose_result_h result, - int *number_of_landmarks) -{ - mv_inference_pose_s *handle = static_cast(result); - - *number_of_landmarks = handle->number_of_landmarks_per_pose; - - LOGI("%d", *number_of_landmarks); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_inference_pose_get_landmark_open( - mv_inference_pose_result_h result, - int pose_index, - int part_index, - mv_point_s *location, - float *score) -{ - mv_inference_pose_s *handle = static_cast(result); - - if (pose_index < 0 || pose_index >= handle->number_of_poses) - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - - if (part_index < 0 || part_index >= handle->number_of_landmarks_per_pose) - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - - *location = handle->landmarks[pose_index][part_index].point; - - *score = handle->landmarks[pose_index][part_index].score; - - LOGI("[%d]:(%dx%d) - %.4f", pose_index, location->x, location->y, *score); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_inference_pose_get_label_open( - mv_inference_pose_result_h result, - int pose_index, - int *label) -{ - mv_inference_pose_s *handle = static_cast(result); - - if (pose_index < 0 || pose_index >= handle->number_of_poses) - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - - *label = handle->landmarks[pose_index][0].label; - - LOGI("[%d]: label(%d)", pose_index, *label); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_pose_create_open(mv_pose_h *pose) -{ - if (pose == NULL) { - LOGE("Handle can't be created because handle pointer is NULL"); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - (*pose) = static_cast(new (std::nothrow) Posture); - - if (*pose == NULL) { - LOGE("Failed to create pose handle"); - return MEDIA_VISION_ERROR_OUT_OF_MEMORY; - } - - LOGD("Inference handle [%p] has been created", *pose); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_pose_destroy_open(mv_pose_h pose) -{ - if (!pose) { - LOGE("Hand can't be destroyed because handle is NULL"); - return MEDIA_VISION_ERROR_INVALID_PARAMETER; - } - - LOGD("Destroy pose handle [%p]", pose); - delete static_cast(pose); - LOGD("Pose handle has been destroyed"); - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_pose_set_from_file_open(mv_pose_h pose, - const char *motionCaptureFilePath, - const char *motionMappingFilePath) -{ - Posture *pPose = static_cast(pose); - - int ret = MEDIA_VISION_ERROR_NONE; - - // check file - if (access(motionCaptureFilePath, F_OK) || access(motionMappingFilePath, F_OK)) { - LOGE("Invalid Motion Capture file path [%s]", motionCaptureFilePath); - LOGE("Invalid Motion Mapping file path [%s]", motionMappingFilePath); - - return MEDIA_VISION_ERROR_INVALID_PATH; - } - - ret = pPose->setPoseFromFile(std::string(motionCaptureFilePath), - std::string(motionMappingFilePath)); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to setPoseFromFile"); - return ret; - } - - return MEDIA_VISION_ERROR_NONE; -} - -int mv_pose_compare_open(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score) -{ - Posture *pPose = static_cast(pose); - - int ret = MEDIA_VISION_ERROR_NONE; - - std::vector> actionParts; - - mv_inference_pose_s *pAction = static_cast(action); - - for (int k = 0; k < HUMAN_POSE_MAX_LANDMARKS; ++k) { - if (pAction->landmarks[0][k].point.x == -1 || pAction->landmarks[0][k].point.y == -1) { - actionParts.push_back(std::make_pair(false, cv::Point(-1,-1))); - continue; - } - - actionParts.push_back(std::make_pair(true, cv::Point(pAction->landmarks[0][k].point.x, - pAction->landmarks[0][k].point.y))); - - } - - ret = pPose->compare(parts, actionParts, score); - if (ret != MEDIA_VISION_ERROR_NONE) { - LOGE("Fail to compare"); - return ret; - } - - LOGD("score: %1.4f", *score); - - return MEDIA_VISION_ERROR_NONE; -} diff --git a/mv_machine_learning/CMakeLists.txt b/mv_machine_learning/CMakeLists.txt new file mode 100644 index 00000000..a941dd48 --- /dev/null +++ b/mv_machine_learning/CMakeLists.txt @@ -0,0 +1,4 @@ +project(mv_machine_learning) +cmake_minimum_required(VERSION 2.6) + +ADD_SUBDIRECTORY(mv_inference) diff --git a/mv_machine_learning/mv_inference/CMakeLists.txt b/mv_machine_learning/mv_inference/CMakeLists.txt new file mode 100644 index 00000000..1f034f2d --- /dev/null +++ b/mv_machine_learning/mv_inference/CMakeLists.txt @@ -0,0 +1,8 @@ +project(mv_inference_port) +cmake_minimum_required(VERSION 2.6) + +if(MEDIA_VISION_INFERENCE_LICENSE_PORT) + add_subdirectory(${PROJECT_SOURCE_DIR}/inference_lic) # Licensed port +else() + add_subdirectory(${PROJECT_SOURCE_DIR}/inference) # Open port +endif() diff --git a/mv_machine_learning/mv_inference/inference/CMakeLists.txt b/mv_machine_learning/mv_inference/inference/CMakeLists.txt new file mode 100644 index 00000000..f8685d60 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/CMakeLists.txt @@ -0,0 +1,44 @@ +project(mv_inference) +CMAKE_MINIMUM_REQUIRED(VERSION 2.6) + +SET_PROPERTY(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG _DEBUG) + +SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) +SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) +SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +INCLUDE_DIRECTORIES("${INC_DIR}") +INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/include") +INCLUDE_DIRECTORIES("${PROJECT_SOURCE_DIR}/src") + +SET(dependents "inference-engine-interface-common iniparser json-glib-1.0") +INCLUDE(FindPkgConfig) +pkg_check_modules(${fw_name} REQUIRED ${dependents}) +FOREACH(flag ${${fw_name}_CFLAGS}) + SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}") + SET(EXTRA_CXXFLAGS "${EXTRA_CXXFLAGS} ${flag}") +ENDFOREACH(flag) + + +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXXFLAGS} -Wno-unused-parameter -Wno-sign-compare") +FILE(GLOB MV_INFERENCE_INCLUDE_LIST "${PROJECT_SOURCE_DIR}/include/*.h" "${PROJECT_SOURCE_DIR}/include/*.hpp") +FILE(GLOB MV_INFERENCE_SOURCE_LIST "${PROJECT_SOURCE_DIR}/src/*.c" "${PROJECT_SOURCE_DIR}/src/*.cpp") + +FIND_PACKAGE(OpenCV REQUIRED core dnn imgproc) +if(NOT OpenCV_FOUND) + MESSAGE(SEND_ERROR "OpenCV NOT FOUND") + RETURN() +else() + INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS}) +endif() + + +if(FORCED_STATIC_BUILD) + ADD_LIBRARY(${PROJECT_NAME} STATIC ${MV_INFERENCE_INCLUDE_LIST} ${MV_INFERENCE_SOURCE_LIST}) +else() + ADD_LIBRARY(${PROJECT_NAME} SHARED ${MV_INFERENCE_INCLUDE_LIST} ${MV_INFERENCE_SOURCE_LIST}) +endif() + +TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${MV_COMMON_LIB_NAME} ${OpenCV_LIBS} inference-engine-interface-common dlog iniparser json-glib-1.0) + +INSTALL(TARGETS ${PROJECT_NAME} DESTINATION ${LIB_INSTALL_DIR}) diff --git a/mv_machine_learning/mv_inference/inference/include/Bvh.h b/mv_machine_learning/mv_inference/inference/include/Bvh.h new file mode 100644 index 00000000..6b9b8533 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Bvh.h @@ -0,0 +1,108 @@ +#ifndef __MEDIA_VISION_BVH_H__ +#define __MEDIA_VISION_BVH_H__ + +#include +#include +#include "Joint.h" +#include +#include + +namespace mediavision +{ +namespace inference +{ + + /** Class created for storing motion data from bvh file */ + class Bvh { + public: + /** Constructor of Bvh object + * @details Initializes local variables + */ + Bvh() : num_frames_(0), frame_time_(0), num_channels_(0) {} + + /** + * Recalculation of local transformation matrix for each frame in each joint + * + * Should be called to set local_transformation_matrix vectors in joints + * structures. + * + * @param start_joint A joint of which each child local transformation + * matrix will be recalculated, as default it is NULL which will be resolved + * to root_joint in method body + */ + void recalculate_joints_ltm(std::shared_ptr start_joint = NULL); + + /** Adds joint to Bvh object + * @details Adds joint and increases number of data channels + * @param joint The joint that will be added + */ + void add_joint(const std::shared_ptr joint) { + joints_.push_back(joint); + num_channels_ += joint->num_channels(); + } + + /** Gets the root joint + * @return The root joint + */ + const std::shared_ptr root_joint() const { return root_joint_; } + + /** Gets all joints + * @return The all joints + */ + const std::vector > joints() const { + return joints_; + } + + /** Gets the number of data frames + * @return The number of frames + */ + unsigned num_frames() const { return num_frames_; } + + /** Gets the frame time + * @return The single frame time (in second) + */ + double frame_time() const { return frame_time_; } + + /** Gets the total number of channels + * @return The number of data channels + */ + unsigned num_channels() const { return num_channels_; } + + /** Sets the root joint + * @param arg The root joint to be set + */ + void set_root_joint(const std::shared_ptr arg) { root_joint_ = arg; } + + /** Sets the all joint at once + * @param arg The all joints to be set + */ + void set_joints(const std::vector > arg) { + joints_ = arg; + } + + /** Sets the number of data frames + * @param arg The number of frames to be set + */ + void set_num_frames(const unsigned arg) { num_frames_ = arg; } + + /** Sets the single data frame time + * @param arg The time of frame to be set + */ + void set_frame_time(const double arg) { frame_time_ = arg; } + + private: + /** A root joint in this bvh file */ + std::shared_ptr root_joint_; + /** All joints in file in order of parse */ + std::vector > joints_; + /** A number of motion frames in this bvh file */ + unsigned num_frames_; + /** A time of single frame */ + double frame_time_; + /** Number of channels of all joints */ + unsigned num_channels_; + }; + +} // namespace +} +#endif // __MEDIA_VISION_BVH_H__ diff --git a/mv_machine_learning/mv_inference/inference/include/BvhParser.h b/mv_machine_learning/mv_inference/inference/include/BvhParser.h new file mode 100644 index 00000000..c96bedb3 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/BvhParser.h @@ -0,0 +1,78 @@ +#ifndef __MEDIA_VISION_BVH_PARSER_H__ +#define __MEDIA_VISION_BVH_PARSER_H__ + +#include "Bvh.h" +#include "Joint.h" + +#include +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + + /** Bvh Parser class that is responsible for parsing .bvh file */ + class BvhParser { + public: + BvhParser() : bvh_(NULL) {}; + ~BvhParser() = default; + + /** Parses single bvh file and stored data into bvh structure + * @param path The path to file to be parsed + * @param bvh The pointer to bvh object where parsed data will be stored + * @return 0 if success, -1 otherwise + */ + int parse(const std::string& path, Bvh* bvh); + + private: + /** Parses single hierarchy in bvh file + * @param file The input stream that is needed for reading file content + * @return 0 if success, -1 otherwise + */ + int parse_hierarchy(std::ifstream& file); + + /** Parses joint and its children in bvh file + * @param file The input stream that is needed for reading file content + * @param parent The pointer to parent joint + * @param parsed The output parameter, here will be stored parsed joint + * @return 0 if success, -1 otherwise + */ + int parse_joint(std::ifstream& file, std::shared_ptr parent, + std::shared_ptr & parsed); + + /** Parses order of channel for single joint + * @param file The input stream that is needed for reading file content + * @param joint The pointer to joint that channels order will be parsed + * @return 0 if success, -1 otherwise + */ + int parse_channel_order(std::ifstream& file, std::shared_ptr joint); + + /** Parses motion part data + * @param file The input stream that is needed for reading file content + * @return 0 if success, -1 otherwise + */ + int parse_motion(std::ifstream& file); + + /** Trims the string, removes leading and trailing whitespace from it + * @param s The string, which leading and trailing whitespace will be + * trimmed + */ + inline void trim(std::string &s) { + s.erase( std::remove_if( s.begin(), s.end(), + std::bind( std::isspace, std::placeholders::_1, + std::locale::classic() ) ), s.end() ); + } + + + /** The path to file that was parsed previously */ + std::string path_; + + /** The bvh object to store parsed data */ + Bvh* bvh_; + }; +} +} // namespace +#endif // __MEDIA_VISION_BVH_PARSER_H__ diff --git a/mv_machine_learning/mv_inference/inference/include/BvhUtils.h b/mv_machine_learning/mv_inference/inference/include/BvhUtils.h new file mode 100644 index 00000000..9d1a131b --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/BvhUtils.h @@ -0,0 +1,40 @@ +#ifndef __MEDIA_VISION_BVH_UTILS_H__ +#define __MEDIA_VISION_BVH_UTILS_H__ + +#include +#include + +#include + +#include +#include +namespace mediavision +{ +namespace inference +{ + +/** Enumeration class for axis */ +enum class Axis { + X, + Y, + Z +}; + +/** Creates rotation matrix + * @param angle The rotation angle + * @param axis The rotation axis + * @return The rotation matrix + */ +cv::Mat rotation_matrix(float angle, Axis axis); + +/** Rotates matrix + * @param matrix The matrix to be rotated + * @param angle The rotation angle + * @param axis The rotation axis + * @return The rotation matrix + */ +cv::Mat rotate(cv::Mat matrix, float angle, Axis axis); + +} // namespace +} +#endif //__MEDIA_VISION_BVH_UTILS_H__ \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/include/Inference.h b/mv_machine_learning/mv_inference/inference/include/Inference.h new file mode 100644 index 00000000..6c88b953 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Inference.h @@ -0,0 +1,388 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_INFERENCE_H__ +#define __MEDIA_VISION_INFERENCE_H__ + +#include +#include + +#include "mv_common.h" +#include "inference_engine_error.h" +#include "inference_engine_common_impl.h" +#include "mv_inference_private.h" +#include +#include +#include +#include "Metadata.h" +#include "PreProcess.h" +#include "PostProcess.h" +#include "TensorBuffer.h" + +#define HUMAN_POSE_MAX_LANDMARKS 16 +#define HUMAN_POSE_MAX_PARTS 6 + +/** + * @file Inference.h + * @brief This file contains the inference class definition which + * provides inference interface. + */ +using namespace InferenceEngineInterface::Common; + +typedef struct _ImageClassficationResults { + int number_of_classes; + std::vector indices; + std::vector names; + std::vector confidences; +} ImageClassificationResults; /**< structure ImageClassificationResults */ + +typedef struct _ObjectDetectionResults { + int number_of_objects; + std::vector indices; + std::vector names; + std::vector confidences; + std::vector locations; +} ObjectDetectionResults; /**< structure ObjectDetectionResults */ + +typedef struct _FaceDetectionResults { + int number_of_faces; + std::vector confidences; + std::vector locations; +} FaceDetectionResults; /**< structure FaceDetectionResults */ + +typedef struct _FacialLandMarkDetectionResults { + int number_of_landmarks; + std::vector locations; +} FacialLandMarkDetectionResults; /**< structure FacialLandMarkDetectionResults */ + +typedef struct _PoseLandmarkDetectionResults { + int number_of_landmarks; + std::vector locations; + std::vector score; +} PoseLandmarkDetectionResults; /**< structure PoseLandmarkDetectionResults */ + +namespace mediavision +{ +namespace inference +{ + struct TensorInfo { + int width; + int height; + int dim; + int ch; + }; + + struct InferenceConfig { + /** + * @brief Default constructor for the @ref InferenceConfig + * + * @since_tizen 5.0 + */ + InferenceConfig(); + + std::string mConfigFilePath; /**< Path of a model configuration file */ + + std::string mWeightFilePath; /**< Path of a model weight file */ + + std::string mUserFilePath; /**< Path of model user file */ + + TensorInfo mTensorInfo; /**< Tensor information */ + + mv_inference_data_type_e mDataType; /**< Data type of a input tensor */ + + mv_inference_backend_type_e mBackedType; /**< Backed type of model files */ + + int mTargetTypes; /**< Target type to run inference */ + + double mConfidenceThresHold; /**< Confidence threshold value */ + + double mMeanValue; /**< The mean value for normalization */ + + double mStdValue; /**< The scale factor value for normalization */ + + int mMaxOutputNumbers; + + std::vector mInputLayerNames; /**< The input layer names */ + std::vector mOutputLayerNames; /**< The output layer names */ + }; + + class Inference + { + public: + /** + * @brief Creates an Inference class instance. + * + * @since_tizen 5.5 + */ + Inference(); + + /** + * @brief Destroys an Inference class instance including + * its all resources. + * + * @since_tizen 5.5 + */ + ~Inference(); + + /** + * @brief Configure modelfiles + * + * @since_tizen 5.5 + */ + void ConfigureModelFiles(const std::string modelConfigFilePath, + const std::string modelWeightFilePath, + const std::string modelUserFilePath); + + /** + * @brief Configure input tensor information + * + * @since_tizen 5.5 + * @remarks deprecated Replayced by ConfigureInputInfo + */ + void ConfigureTensorInfo(int width, int height, int dim, int ch, + double stdValue, double meanValue); + + /** + * @brief Configure input information + * + * @since_tizen 6.0 + */ + void ConfigureInputInfo(int width, int height, int dim, int ch, + double stdValue, double meanValue, int dataType, + const std::vector names); + + void ConfigureOutputInfo(std::vector names); + + /** + * @brief Configure inference backend type. + * + * @since_tizen 6.0 + */ + int ConfigureBackendType(const mv_inference_backend_type_e backendType); + + /** + * @brief Configure a inference target device type such as CPU, GPU or NPU. (only one type can be set) + * @details Internally, a given device type will be converted to new type. + * This API is just used for backward compatibility. + * + * @since_tizen 6.0 (Deprecated) + */ + int ConfigureTargetTypes(const int targetType); + + /** + * @brief Configure inference target devices such as CPU, GPU or NPU. (one more types can be combined) + * + * @since_tizen 6.0 + */ + int ConfigureTargetDevices(const int targetDevices); + + /** + * @brief Configure the maximum number of inference results + * + * @since_tizen 5.5 + */ + void ConfigureOutput(const int maxOutputNumbers); + + /** + * @brief Configure the confidence threshold + * + * @since_tizen 5.5 + */ + void ConfigureThreshold(const double threshold); + + /** + * @brief Parses the metadata file path + * + * @since_tizen 6.5 + */ + int ParseMetadata(const std::string filePath); + + /** + * @brief Bind a backend engine + * @details Use this function to bind a backend engine for the inference. + * This creates a inference engine common class object, and loads a backend + * library which inferfaces with a Neural Network runtime such as TF Lite, + * OpenCV, ARMNN and so on. + * + * Ps. The created inference engine common object will be released and its + * corresponding backend library will be unbound when deconstructor + * of Inference class will be called. + * + * @since_tizen 6.0 + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + */ + int Bind(); + + /** + * @brief Set default configuration for the inference + * @details Use this function to set default configuration given in json file by user. + * + * Ps. this callback should be called after Bind callback. + * + * @since_tizen 6.0 + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + */ + int Prepare(); + + /** + * @brief Load model files + * @details Use this function to load given model files for the inference. + * + * Ps. this callback should be called after Prepare callback. + * + * @since_tizen 6.0 + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + */ + int Load(); + + /** + * @brief Runs inference with a region of a given image + * @details Use this function to run forward pass with the given image. + * The given image is preprocessed and the region of the image is + * thrown to neural network. Then, the output tensor is returned. + * If roi is NULL, then full source will be analyzed. + * + * @since_tizen 5.5 + * @return @c true on success, otherwise a negative error value + */ + int Run(std::vector &mvSources, + std::vector &rects); + + /** + * @brief Gets that given engine is supported or not + * + * @since_tizen 5.5 + * @return @c true on success, otherwise a negative error value + */ + std::pair GetSupportedInferenceBackend(int backend); + + /** + * @brief Gets the ImageClassificationResults + * + * @since_tizen 5.5 + * @return @c true on success, otherwise a negative error value + */ + int GetClassficationResults(ImageClassificationResults *classificationResults); + + /** + * @brief Gets the ObjectDetectioResults + * + * @since_tizen 5.5 + * @return @c true on success, otherwise a negative error value + */ + int GetObjectDetectionResults(ObjectDetectionResults *detectionResults); + + /** + * @brief Gets the FaceDetectioResults + * + * @since_tizen 5.5 + * @return @c true on success, otherwise a negative error value + */ + int GetFaceDetectionResults(FaceDetectionResults *detectionResults); + + /** + * @brief Gets the FacialLandmarkDetectionResults + * + * @since_tizen 5.5 + * @return @c true on success, otherwise a negative error value + */ + int GetFacialLandMarkDetectionResults(FacialLandMarkDetectionResults *results); + + /** + * @brief Gets the PoseLandmarkDetectionResults + * + * @since_tizen 6.0 + * @return @c true on success, otherwise a negative error value + */ + int GetPoseLandmarkDetectionResults(mv_inference_pose_result_h *detectionResults, + int width, int height); + + int GetResults(std::vector > *dimInfo, + std::vector *results); + + mv_engine_config_h GetEngineConfig(void) + { + return engine_config; + } + + void SetEngineConfig(mv_engine_config_h config) + { + engine_config = config; + } + + int GetTargetType() + { + return mConfig.mTargetTypes; + } + + private: + bool mCanRun; /**< The flag indicating ready to run Inference */ + InferenceConfig mConfig; + inference_engine_capacity mBackendCapacity; + std::map > mSupportedInferenceBackend; + cv::Size mInputSize; + int mCh; + int mDim; + double mDeviation; + double mMean; + double mThreshold; + int mOutputNumbers; + cv::Size mSourceSize; + cv::Mat mInputBuffer; + mv_engine_config_h engine_config; + InferenceEngineCommon *mBackend; + std::map mModelFormats; + std::vector mUserListName; + //std::map mInputTensorBuffers; + TensorBuffer mInputTensorBuffers; + inference_engine_layer_property mInputLayerProperty; + //std::map mOutputTensorBuffers; + TensorBuffer mOutputTensorBuffers; + inference_engine_layer_property mOutputLayerProperty; + + mv_inference_pose_s *mPoseResult; + + Metadata mMetadata; + PreProcess mPreProc; + PostProcess mPostProc; + + private: + void CheckSupportedInferenceBackend(); + int ConvertEngineErrorToVisionError(int error); + int ConvertTargetTypes(int given_types); + int ConvertToCv(int given_type); + inference_tensor_data_type_e ConvertToIE(int given_type); + int Preprocess(cv::Mat cvImg, cv::Mat cvDst, int data_type); + int PrepareTenosrBuffers(void); + void CleanupTensorBuffers(void); + int SetUserFile(std::string filename); + int FillOutputResult(tensor_t &outputData); + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_INFERENCE_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/InferenceIni.h b/mv_machine_learning/mv_inference/inference/include/InferenceIni.h new file mode 100644 index 00000000..7a586148 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/InferenceIni.h @@ -0,0 +1,72 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_INFERENCE_INI_H__ +#define __MEDIA_VISION_INFERENCE_INI_H__ + +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + class InferenceInI + { + public: + /** + * @brief Creates an Inference class instance. + * + * @since_tizen 5.5 + */ + InferenceInI(); + + /** + * @brief Destroys an Inference class instance including + * its all resources. + * + * @since_tizen 5.5 + */ + ~InferenceInI(); + + /** + * @brief Load() + * + * @since_tizen 5.5 + */ + int LoadInI(); + + /** + * @brief Unload() + * + * @since_tizen 5.5 + */ + void UnLoadInI(); + + std::vector GetSupportedInferenceEngines(); + + private: + std::vector mSupportedInferenceBackend; + std::string mIniDefaultPath; + std::string mDefaultBackend; + std::string mDelimeter; + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_INFERENCE_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/InputMetadata.h b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h new file mode 100644 index 00000000..01da01cb --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/InputMetadata.h @@ -0,0 +1,127 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_INPUTMETADATA_H__ +#define __MEDIA_VISION_INPUTMETADATA_H__ + +#include +#include +#include + +#include +#include +#include + +/** + * @file InputMetadata.h + * @brief This file contains the metadata class definition which + * provides metadata of a model. + */ + +namespace mediavision +{ +namespace inference +{ + class Options + { + public: + class Normalization + { + public: + bool use; + std::vector mean; + std::vector std; + + Normalization() : use(false) {} + ~Normalization() = default; + }; + + class Quantization + { + public: + bool use; + std::vector scale; + std::vector zeropoint; + + Quantization() : use(false) {}; + ~Quantization() = default; + }; + + Normalization normalization; + Quantization quantization; + + Options() = default; + ~Options() = default; + }; + + class LayerInfo + { + public: + + std::string name; + std::vector dims; + mv_colorspace_e colorSpace; + mv_inference_data_type_e dataType; + inference_tensor_shape_type_e shapeType; // TODO: define mv_inference_shape_type_e + + LayerInfo() = default; + ~LayerInfo() = default; + + int GetWidth() const; + int GetHeight() const; + int GetChannel() const; + }; + + class InputMetadata + { + public: + bool parsed; + std::map layer; + std::map option; + + /** + * @brief Creates an InputMetadata class instance. + * + * @since_tizen 6.5 + */ + InputMetadata() : parsed(false) {}; + + /** + * @brief Destroys an InputMetadata class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~InputMetadata() = default; + + /** + * @brief Parses an InputMetadata + * + * @since_tizen 6.5 + */ + int Parse(JsonObject *root); + + private: + int GetTensorInfo(JsonObject* root); + int GetPreProcess(JsonObject* root); + mv_colorspace_e ConvertTypeToMD(const std::string& type); + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_INPUTMETADATA_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/Joint.h b/mv_machine_learning/mv_inference/inference/include/Joint.h new file mode 100644 index 00000000..d28a70c2 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Joint.h @@ -0,0 +1,232 @@ +#ifndef __MEDIA_VISION_JOINT_H__ +#define __MEDIA_VISION_JOINT_H__ + +#include +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + + /** Class created for storing single joint data from bvh file */ + class Joint { + public: + /** A struct that keep offset of joint in relation to parent */ + struct Offset { + float x; + float y; + float z; + }; + + /** A enumeration type useful for set order of channels for every joint */ + enum class Channel { + XPOSITION, + YPOSITION, + ZPOSITION, + ZROTATION, + XROTATION, + YROTATION + }; + + /** A string names for each channel */ + const std::vector channel_name_str = { + "XPOSITION", + "YPOSITION", + "ZPOSITION", + "ZROTATION", + "XROTATION", + "YROTATION" + }; + + /** Adds single frame motion data + * @param data The motion data to be added + */ + void add_frame_motion_data(const std::vector & data) { + channel_data_.push_back(data); + } + + /** Gets the parent joint of this joint + * @return The parent joint + */ + std::shared_ptr parent() const { return parent_; } + + /** Gets the name of this joint + * @return The joint's name + */ + std::string name() const { return name_; } + + /** Gets the offset of this joint + * @return The joint's offset + */ + Offset offset() const { return offset_; } + + /** Gets the channels order of this joint + * @return The joint's channels order + */ + std::vector channels_order() const { + return channels_order_; + } + + /** Gets the all children joints of this joint + * @return The joint's children + */ + std::vector > children() const { + return children_; + } + + /** Gets the channels data of this joint for all frames + * @return The joint's channel data + */ + const std::vector >& channel_data() const { + return channel_data_; + } + + /** Gets the channel data of this joint for selected frame + * @param frame The frame for which channel data will be returned + * @return The joint's channel data for selected frame + */ + const std::vector & channel_data(unsigned frame) const { + return channel_data_[frame]; + } + + /** Gets the channel data of this joint for selected frame and channel + * @param frame The frame for which channel data will be returned + * @param channel_num The number of channel which data will be returned + * @return The joint's channel data for selected frame and channel + */ + float channel_data(unsigned frame, unsigned channel_num) const { + return channel_data_[frame][channel_num]; + } + + /** Gets the local transformation matrix for this joint for all frames + * @return The joint's local transformation matrix + */ + std::vector ltm() const { + return ltm_; + } + + /** Gets the local transformation matrix for this joint for selected frame + * @param frame The frame for which ltm will be returned + * @return The joint's local transformation matrix for selected frame + */ + cv::Mat ltm(unsigned frame) const { + return ltm_[frame]; + } + + /** Gets the position for this joint for all frames + * @return The joint's position + */ + std::vector pos() const { + return pos_; + } + + /** Gets the position for this joint for selected frame + * @param frame The frame for which ltm will be returned + * @return The joint's position for selected frame + */ + cv::Vec3f pos(unsigned frame) const { + return pos_[frame]; + } + + /** Gets the number of channels of this joint + * @return The joint's channels number + */ + unsigned num_channels() const { return channels_order_.size(); } + + /** Sets the this joint parent joint + * @param arg The parent joint of this joint + */ + void set_parent(const std::shared_ptr arg) { parent_ = arg; } + + /** Sets the this joint name + * @param arg The name of this joint + */ + void set_name(const std::string arg) { name_ = arg; } + + /** Sets the this joint offset + * @param arg The offset of this joint + */ + void set_offset(const Offset arg) { offset_ = arg; } + + /** Sets the this joint channels order + * @param arg The channels order of this joint + */ + void set_channels_order(const std::vector & arg) { + channels_order_ = arg; + } + + /** Sets the this joint children + * @param arg The children of this joint + */ + void set_children(const std::vector >& arg) { + children_ = arg; + } + + /** Sets the this joint channels data + * @param arg The channels data of this joint + */ + void set_channel_data(const std::vector >& arg) { + channel_data_ = arg; + } + + /** Sets local transformation matrix for selected frame + * @param matrix The local transformation matrix to be set + * @param frame The number of frame for which you want set ltm. As + * default it is set to 0. + */ + void set_ltm(const cv::Mat matrix, unsigned frame = 0) { + if (frame > 0 && frame < ltm_.size()) + ltm_[frame] = matrix; + else + ltm_.push_back(matrix); + } + + /** Sets local transformation matrix for selected frame + * @param pos The position of joint in selected frame to be set + * @param frame The number of frame for which you want set position. As + * default it is set to 0. + */ + void set_pos(const cv::Vec3f pos, unsigned frame = 0) { + if (frame > 0 && frame < pos_.size()) + pos_[frame] = pos; + else + pos_.push_back(pos); + } + + /** Gets channels name of this joint + * @return The joint's channels name + */ + const std::vector get_channels_name() const { + std::vector channel_names; + + for (int i = 0; i < channels_order_.size(); i++) + channel_names.push_back(channel_name_str[static_cast( + channels_order_[i])]); + + return channel_names; + } + + private: + /** Parent joint in file hierarchy */ + std::shared_ptr parent_; + std::string name_; + Offset offset_; + /** Order of joint's input channels */ + std::vector channels_order_; + /** Pointers to joints that are children of this in hierarchy */ + std::vector > children_; + /** Structure for keep joint's channel's data. + * Each vector keep data for one channel. + */ + std::vector > channel_data_; + /** Local transformation matrix for each frame */ + std::vector ltm_; + /** Vector x, y, z of joint position for each frame */ + std::vector pos_; + }; +} +} // namespace +#endif // __MEDIA_VISION_JOINT_H__ diff --git a/mv_machine_learning/mv_inference/inference/include/Metadata.h b/mv_machine_learning/mv_inference/inference/include/Metadata.h new file mode 100644 index 00000000..ecf9ef6a --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Metadata.h @@ -0,0 +1,90 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_METADATA_H__ +#define __MEDIA_VISION_METADATA_H__ + +#include +#include + +#include "mv_common.h" +#include "mv_inference_private.h" +#include +#include "InputMetadata.h" +#include "OutputMetadata.h" +#include + +/** + * @file Metadata.h + * @brief This file contains the metadata class definition which + * provides metadata of a model. + */ + +namespace mediavision +{ +namespace inference +{ + class Metadata + { + public: + /** + * @brief Creates an Metadata class instance. + * + * @since_tizen 6.5 + */ + Metadata() = default; + + /** + * @brief Destroys an Metadata class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~Metadata() = default; + + /** + * @brief Initializes an Metadata class + * + * @since_tizen 6.5 + */ + int Init(const std::string& filename); + + /** + * @brief Parses a metafile and set values to InputMetadata + * and OutputMetadata + * + * @since_tizen 6.5 + */ + int Parse(); + + InputMetadata& GetInputMeta(); + OutputMetadata& GetOutputMeta(); + + private: + int ParseInputMeta(JsonObject *object); + int ParseOutputMeta(JsonObject *object); + + private: + std::string mMetafile; + + InputMetadata mInputMeta; + OutputMetadata mOutputMeta; + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_METADATA_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h new file mode 100755 index 00000000..f5324f22 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/ObjectDecoder.h @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_OBJECTDECODER_H__ +#define __MEDIA_VISION_OBJECTDECODER_H__ + +#include +#include +#include +#include +#include + +#include "TensorBuffer.h" +#include "OutputMetadata.h" +#include "PostProcess.h" + +/** + * @file ObjectDecoder.h + * @brief This file contains the ObjectDecoder class definition which + * provides object decoder. + */ + +namespace mediavision +{ +namespace inference +{ + class ObjectDecoder + { + private: + TensorBuffer mTensorBuffer; + OutputMetadata mMeta; + int mBoxOffset; + int mNumberOfOjects; + + ScoreInfo& mScoreInfo; + BoxInfo& mBoxInfo; + + float mScaleW; + float mScaleH; + + Boxes mResultBoxes; + + float decodeScore(int idx); + Box decodeBox(int idx, float score, int label = -1); + Box decodeBoxWithAnchor(int idx, int anchorIdx, float score, cv::Rect2f& anchor); + + public: + ObjectDecoder(TensorBuffer& buffer, OutputMetadata& metaData, + int boxOffset, float scaleW, float scaleH, int numberOfObjects = 0) : + mTensorBuffer(buffer), mMeta(metaData), + mBoxOffset(boxOffset), mNumberOfOjects(numberOfObjects), + mScoreInfo(mMeta.GetScore()), mBoxInfo(mMeta.GetBox()), + mScaleW(scaleW), mScaleH(scaleH), + mResultBoxes() { + }; + + ~ObjectDecoder() = default; + + int init(); + int decode(); + Boxes& getObjectAll(); + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_OBJECTDECODER_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h new file mode 100644 index 00000000..f311ee41 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/OutputMetadata.h @@ -0,0 +1,255 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_OUTPUTMETADATA_H__ +#define __MEDIA_VISION_OUTPUTMETADATA_H__ + +#include +#include +#include +#include + +#include +#include +#include + +/** + * @file OutputMetadata.h + * @brief This file contains the metadata class definition which + * provides metadata of a model. + */ + +namespace mediavision +{ +namespace inference +{ + class DimInfo + { + private: + std::vector dims; + public: + std::vector GetValidIndexAll() const; + void SetValidIndex(int index); + }; + + class DeQuantization + { + private: + double scale; + double zeropoint; + public: + DeQuantization(double s, double z) : scale(s), zeropoint(z) {}; + ~DeQuantization() = default; + + double GetScale() { return scale; } + double GetZeroPoint() { return zeropoint; } + }; + + class ScoreInfo + { + private: + std::string name; + DimInfo dimInfo; + double threshold; + int type; + int topNumber; + std::shared_ptr deQuantization; + + public: + ScoreInfo() = default; + ~ScoreInfo() = default; + + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + double GetThresHold() { return threshold; } + int GetType() { return type; } + int GetTopNumber() { return topNumber; } + std::shared_ptr GetDeQuant() { return deQuantization; } + + int ParseScore(JsonObject *root); + }; + + class BoxInfo + { + public: + class DecodeInfo { + public: + class AnchorParam { + public: + int mode; /**< 0: generate anchor, 1:load pre-anchor*/ + int numLayers; + float minScale; + float maxScale; + int inputSizeHeight; + int inputSizeWidth; + float anchorOffsetX; + float anchorOffsetY; + std::vector strides; + std::vector aspectRatios; + bool isReduceBoxedInLowestLayer; + float interpolatedScaleAspectRatio; + bool isFixedAnchorSize; + bool isExponentialBoxScale; + float xScale; + float yScale; + float wScale; + float hScale; + + AnchorParam() = default; + ~AnchorParam() = default; + }; + + class NMSParam { + public: + int mode; /**< 0: IOU */ + float threshold; + + NMSParam() : mode(-1), threshold(0.2f) {}; + ~NMSParam() = default; + }; + + private: + AnchorParam anchorParam; + std::vector anchorBoxes; + NMSParam nmsParam; + + public: + DecodeInfo() = default; + ~DecodeInfo() = default; + std::vector& GetAnchorBoxAll(); + bool IsAnchorBoxEmpty(); + void AddAnchorBox(cv::Rect2f& ahcnor); + void ClearAnchorBox(); + + // Anchor param + int ParseAnchorParam(JsonObject *root); + int GenerateAnchor(); + bool IsFixedAnchorSize(); + bool IsExponentialBoxScale(); + float GetAnchorXscale(); + float GetAnchorYscale(); + float GetAnchorWscale(); + float GetAnchorHscale(); + float CalculateScale(float min, float max, int index, int maxStride); + + // Nms param + int ParseNms(JsonObject *root); + int GetNmsMode(); + float GetNmsThreshold(); + }; + + private: + std::string name; + DimInfo dimInfo; + int type; // 0:LTRB, 1: CxCyWH + std::vector order; // Order based on box type + int coordinate; // 0: ratio, 1: pixel + int decodingType; // 0: post-op, 1: achorbox(ssd), 2:yolo(?) + DecodeInfo decodingInfo; + + public: + BoxInfo() = default; + ~BoxInfo() = default; + + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + int GetType() { return type; } + std::vector GetOrder() { return order; } + int GetCoordinate() { return coordinate; } + int GetDecodingType() { return decodingType; } + DecodeInfo& GetDecodeInfo() {return decodingInfo; } + + int ParseBox(JsonObject *root); + }; + + class Label + { + private: + std::string name; + DimInfo dimInfo; + + public: + Label() = default; + ~Label() = default; + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + + int ParseLabel(JsonObject *root); + }; + + class Number + { + private: + std::string name; + DimInfo dimInfo; + + public: + Number() = default; + ~Number() = default; + std::string GetName() { return name; } + DimInfo GetDimInfo() { return dimInfo; } + + int ParseNumber(JsonObject *root); + }; + + class OutputMetadata + { + private: + bool parsed; + ScoreInfo score; + BoxInfo box; + Label label; + Number number; + + int ParseScore(JsonObject *root); + int ParseBox(JsonObject *root); + int ParseLabel(JsonObject *root); + int ParseNumber(JsonObject *root); + int ParseBoxDecodeInfo(JsonObject *root); + + public: + /** + * @brief Creates an OutputMetadata class instance. + * + * @since_tizen 6.5 + */ + OutputMetadata() : parsed(false) {}; + + /** + * @brief Destroys an OutputMetadata class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~OutputMetadata() = default; + + /** @brief Parses an OutputMetadata + * + * @since_tizen 6.5 + */ + int Parse(JsonObject *root); + + bool IsParsed(); + ScoreInfo& GetScore(); + BoxInfo& GetBox(); + Label& GetLabel(); + Number& GetNumber(); + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_OUTPUTMETADATA_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/PostProcess.h b/mv_machine_learning/mv_inference/inference/include/PostProcess.h new file mode 100644 index 00000000..fbf64be1 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/PostProcess.h @@ -0,0 +1,98 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_POSTPROCESS_H__ +#define __MEDIA_VISION_POSTPROCESS_H__ + +#include +#include +#include + +#include "mv_common.h" +#include "OutputMetadata.h" + +#include +#include + + +/** + * @file PostProcess.h + * @brief This file contains the PostProcess class definition which + * provides PostProcess after running inference. + */ + +/** + * @brief Box structure. + * @details Box structure includes index, score, location. + * + */ +typedef struct _Box { + int index; /**< index of box belonging to a category */ + float score; /**< score of box belonging to the index */ + cv::Rect2f location; /**< location of a box */ +} Box; + +using Boxes = std::vector; +using BoxesList = std::vector; + +namespace mediavision +{ +namespace inference +{ + class PostProcess + { + public: + /** + * @brief Creates an PostProcess class instance. + * + * @since_tizen 6.5 + */ + PostProcess() : mMaxScoreSize(3) {}; + + /** + * @brief Destroys an PostProcess class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~PostProcess() = default; + + /** + * @brief Calculates sigmoid. + * + * @since_tizen 6.5 + */ + static float sigmoid(float value); + static float dequant(float value, float scale, float zeropoint); + + int ScoreClear(int size); + int ScorePush(float value, int index); + int ScorePop(std::vector>& top); + int Nms(BoxesList& boxeslist, int mode, float threshold, Boxes& nmsboxes); + + private: + std::priority_queue, + std::vector>, + std::greater>> mScore; + private: + int mMaxScoreSize; + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_POSTPROCESS_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/Posture.h b/mv_machine_learning/mv_inference/inference/include/Posture.h new file mode 100644 index 00000000..4c67fdcd --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/Posture.h @@ -0,0 +1,95 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_POSE_H__ +#define __MEDIA_VISION_POSE_H__ + +#include +#include + +#include "mv_common.h" +#include "Inference.h" +#include "Bvh.h" +#include "BvhParser.h" +#include +#include +#include + +/** + * @file Pose.h + * @brief This file contains the pose class definition + */ + +namespace mediavision +{ +namespace inference +{ + class Posture + { + public: + /** + * @brief Creates an Posture class instance. + * + * @since_tizen 6.0 + */ + Posture(); + + /** + * @brief Destroys an Posture class instance including + * its all resources. + * + * @since_tizen 6.0 + */ + ~Posture(); + + /** + * @brief Sets file path + * + * @since_tizen 6.0 + */ + int setPoseFromFile(const std::string motionCaptureFilePath, + const std::string motionMappingFilePath); + + /** + * @brief Compares a pose for @a part and returns score + * + * @since_tizen 6.0 + */ + int compare(int parts, std::vector> action, + float* score); + + private: + cv::Vec2f getUnitVectors(cv::Point point1, cv::Point point2); + int getParts(int parts, + std::vector>& pose, + std::vector>>& posePart); + float getSimilarity(int parts, + std::vector>>& posePart, + std::vector>>& actionPart); + float cosineSimilarity(std::vector vec1, std::vector vec2, int size); + + private: + BvhParser mBvhParser; + Bvh mBvh; + std::map mMotionToPoseMap; /**< name, index */ + std::vector> mPose; + std::vector>> mPoseParts; + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_INFERENCE_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/PreProcess.h b/mv_machine_learning/mv_inference/inference/include/PreProcess.h new file mode 100644 index 00000000..f4c002bb --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/PreProcess.h @@ -0,0 +1,77 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_PREPROCESS_H__ +#define __MEDIA_VISION_PREPORCESS_H__ + +#include +#include + +#include "mv_common.h" +#include "InputMetadata.h" + +#include +#include + + +/** + * @file PreProcess.h + * @brief This file contains the PreProcess class definition which + * provides PreProcess before running inference. + */ + +namespace mediavision +{ +namespace inference +{ + class PreProcess + { + public: + /** + * @brief Creates an PreProcess class instance. + * + * @since_tizen 6.5 + */ + PreProcess() = default; + + /** + * @brief Destroys an PreProcess class instance including + * its all resources. + * + * @since_tizen 6.5 + */ + ~PreProcess() = default; + + /** + * @brief Runs PreProcess with layerInfo and options + * + * @since_tizen 6.5 + */ + int Run(cv::Mat& source, const int colorSpace, const int dataType, const LayerInfo& layerInfo, + const Options& options, void* buffer); + + private: + int Resize(cv::Mat& source, cv::Mat& dest, cv::Size size); + int ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType); + int Normalize(cv::Mat& source, cv::Mat& dest, + const std::vector& mean, const std::vector& std); + + }; + +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_PREPROCESS_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/TensorBuffer.h b/mv_machine_learning/mv_inference/inference/include/TensorBuffer.h new file mode 100644 index 00000000..9054ec7b --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/TensorBuffer.h @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_TENSORBUFFER_H__ +#define __MEDIA_VISION_TENSORBUFFER_H__ + +#include +#include +#include +#include "mv_common.h" +#include "mv_inference_private.h" +#include +#include + + +/** + * @file TensorBuffer.h + * @brief This file contains the tensor buffer class definition which + * provides name and inference_engine_tensor_buffer. + */ + +using IETensorBuffer = std::map; +namespace mediavision +{ +namespace inference +{ + class TensorBuffer + { + private: + IETensorBuffer mTensorBuffer; + + public: + TensorBuffer() = default; + ~TensorBuffer() = default; + + bool empty(); + bool exist(std::string name); + void clear(); + size_t size(); + + IETensorBuffer& getAllTensorBuffer(); + inference_engine_tensor_buffer* getTensorBuffer(std::string name); + bool setTensorBuffer(std::string name, inference_engine_tensor_buffer& buffer); + + template + T getValue(std::string name, int idx); + }; +} /* Inference */ +} /* MediaVision */ + +#endif /* __MEDIA_VISION_TENSOR_BUFFER_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/include/mv_inference_open.h b/mv_machine_learning/mv_inference/inference/include/mv_inference_open.h new file mode 100644 index 00000000..7f22ac98 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/include/mv_inference_open.h @@ -0,0 +1,706 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MEDIA_VISION_INFERENCE_OPEN_H__ +#define __MEDIA_VISION_INFERENCE_OPEN_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** + * @file mv_inference_open.h + * @brief This file contains the Media Vision Inference Open API. + */ + + /*************/ + /* Inference */ + /*************/ + + mv_engine_config_h mv_inference_get_engine_config(mv_inference_h infer); + + /** + * @brief Create infernce handle. + * @details Use this function to create an inference handle. After creation + * the inference handle has to be prepared with + * @ref mv_inference_prepare() function to prepare an inference. + * + * @since_tizen 5.5 + * + * @param [out] infer The handle to the inference to be created + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @post Release @a infer by using + * @ref mv_inference_destroy() function when it is not needed + * anymore + * + * @see mv_inference_destroy_open() + * @see mv_inference_prepare_open() + */ + int mv_inference_create_open(mv_inference_h *infer); + + /** + * @brief Destroy inference handle and releases all its resources. + * + * @since_tizen 5.5 + * + * @param [in] infer The handle to the inference to be destroyed + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @pre Create an inference handle by using @ref mv_inference_create_open() + * + * @see mv_inference_create_open() + */ + int mv_inference_destroy_open(mv_inference_h infer); + + /** + * @brief Configure the inference model data to inference handle + * + * @since_tizen 5.5 + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int mv_inference_configure_model_open(mv_inference_h infer, + mv_engine_config_h engine_config); + + /** + * @brief Configure the tensor information to the inference handle + * + * @since_tizen 5.5 + * @remarks deprecated Replaced by mv_inference_configure_input_info + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + + int + mv_inference_configure_input_info_open(mv_inference_h infer, + mv_engine_config_h engine_config); + + /** + * @brief Configure the input information to the inference handle + * + * @since_tizen 6.0 + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int + mv_inference_configure_input_info_open(mv_inference_h infer, + mv_engine_config_h engine_config); + + /** + * @brief Configure the backend to the inference handle + * + * @since_tizen 5.5 + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int mv_inference_configure_engine_open(mv_inference_h infer, + mv_engine_config_h engine_config); + + /** + * @brief Configure the number of output to the inference handle + * + * @since_tizen 5.5 + * @remarks deprecated Replaced by mv_inference_configure_post_process_info_open + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int mv_inference_configure_output_open(mv_inference_h infer, + mv_engine_config_h engine_config); + + /** + * @brief Configure the confidence threshold value to the inference handle + * + * @since_tizen 5.5 + * @remarks deprecated Replaced by mv_inference_configure_post_process_info_open + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int mv_inference_configure_confidence_threshold_open( + mv_inference_h infer, mv_engine_config_h engine_config); + + /** + * @brief Configure the post process infomation to the inference handle + * + * @since_tizen 6.0 + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int mv_inference_configure_post_process_info_open( + mv_inference_h infer, mv_engine_config_h engine_config); + + /** + * @brief Configure the set of output node names to the inference handle + * + * @since_tizen 5.5 + * @remarks deprecated Replaced by mv_inference_configure_output_info_open + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int mv_inference_configure_output_node_names_open( + mv_inference_h infer, mv_engine_config_h engine_config); + + /** + * @brief Configure the output information to the inference handle + * + * @since_tizen 6.0 + * + * @param [in] infer The handle to the inference + * @param [in] engine_config The handle to the configuration of + * engine. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + */ + int + mv_inference_configure_output_info_open(mv_inference_h infer, + mv_engine_config_h engine_config); + + /** + * @brief Prepare inference. + * @details Use this function to prepare inference based on + * the configured network. + * + * @since_tizen 5.5 + * + * @param [in] infer The handle to the inference + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of model data + * in @a engine_config + * @retval #MEDIA_VISION_ERROR_INVALID_DATA Invalid model data + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + */ + int mv_inference_prepare_open(mv_inference_h infer); + + /** + * @brief Traverses the list of supported engines for inference. + * @details Using this function the supported engines can be obtained. + * The names can be used with mv_engine_config_h related + * getters and setters to get/set MV_INFERENCE_BACKEND_TYPE attribute + * value. + * + * @since_tizen 5.5 + * @param [in] infer The handle to the inference + * @param [in] callback The iteration callback function + * @param [in] user_data The user data to be passed to the callback function + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * + * @pre @a engine_cfg must be created + * + * @see mv_engine_config_set_string_attribute() + * @see mv_engine_config_get_string_attribute() + */ + int mv_inference_foreach_supported_engine_open( + mv_inference_h infer, mv_inference_supported_engine_cb callback, + void *user_data); + + /** + * @brief Performs image classification on the @a source + * @details Use this function to launch image classification. + * Each time when mv_inference_image_classify is + * called, @a classified_cb will receive classes + * which the media source may belong to. + * + * @since_tizen 5.5 + * + * @param [in] source The handle to the source of the media + * @param [in] infer The handle to the inference + * @param [in] roi Rectangular box bounding the region-of-interest on the + * @a source. If NULL, then full source will be + * analyzed. + * @param [in] classified_cb The callback which will be called for + * classification on media source. + * This callback will receive classification results. + * @param [in] user_data The user data passed from the code where + * @ref mv_inference_image_classify_open() is invoked. This data will + * be accessible from @a classified_cb callback. + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an inference handle by calling @ref mv_inference_create() + * @pre Configure an inference handle by calling @ref mv_inference_configure() + * @pre Prepare an inference by calling @ref mv_inference_prepare() + * @post @a classified_cb will be called to process classification results + * + * @see mv_inference_image_classified_cb + */ + int mv_inference_image_classify_open( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_image_classified_cb classified_cb, void *user_data); + + /** + * @brief Performs object detection on the @a source + * @details Use this function to launch object detection. + * Each time when mv_inference_object_detection is + * called, @a detected_cb will receive a list of objects and their locations + * on the media source. + * + * @since_tizen 5.5 + * + * @param [in] source The handle to the source of the media + * @param [in] infer The handle to the inference + * @param [in] detected_cb The callback which will be called for + * detecting objects on media source. + * This callback will receive the detection results. + * @param [in] user_data The user data passed from the code where + * @ref mv_inference_object_detect() is invoked. This data will + * be accessible from @a detected_cb callback. + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an inference handle by calling @ref mv_inference_create() + * @pre Configure an inference handle by calling @ref mv_inference_configure() + * @pre Prepare an inference by calling @ref mv_inference_prepare() + * @post @a detected_cb will be called to process detection results + * + * @see mv_inference_object_detected_cb + */ + int + mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer, + mv_inference_object_detected_cb detected_cb, + void *user_data); + + /** + * @brief Performs face detection on the @a source + * @details Use this function to launch face detection. + * Each time when mv_inference_face_detection is + * called, @a detected_cb will receive a list of faces and their locations + * on the media source. + * + * @since_tizen 5.5 + * + * @param [in] source The handle to the source of the media + * @param [in] infer The handle to the inference + * @param [in] detected_cb The callback which will be called for + * detecting faces on media source. + * This callback will receive the detection results. + * @param [in] user_data The user data passed from the code where + * @ref mv_inference_face_detect() is invoked. This data will + * be accessible from @a detected_cb callback. + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an inference handle by calling @ref mv_inference_create() + * @pre Configure an inference handle by calling @ref mv_inference_configure() + * @pre Prepare an inference by calling @ref mv_inference_prepare() + * @post @a detected_cb will be called to process detection results + * + * @see mv_inference_face_detected_cb + */ + int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer, + mv_inference_face_detected_cb detected_cb, + void *user_data); + + /** + * @brief Performs facial landmarks detection on the @a source + * @details Use this function to launch facial landmark detection. + * Each time when mv_inference_facial_landmark_detect() is + * called, @a detected_cb will receive a list facial landmark's locations + * on the media source. + * + * @since_tizen 5.5 + * + * @param [in] source The handle to the source of the media + * @param [in] infer The handle to the inference + * @param[in] roi Rectangular box bounding face image on the + * @a source. If NULL, then full source will be + * analyzed. + * @param [in] detected_cb The callback which will be called for + * detecting facial landmark on media source. + * This callback will receive the detection results. + * @param [in] user_data The user data passed from the code where + * @ref mv_inference_facial_landmark_detect() is invoked. + * This data will be accessible from @a detected_cb callback. + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * + * @pre Create a source handle by calling @ref mv_create_source() + * @pre Create an inference handle by calling @ref mv_inference_create() + * @pre Configure an inference handle by calling @ref mv_inference_configure() + * @pre Prepare an inference by calling @ref mv_inference_prepare() + * @post @a detected_cb will be called to process detection results + * + * @see mv_inference_facial_landmark_detected_cb + */ + int mv_inference_facial_landmark_detect_open( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_facial_landmark_detected_cb detected_cb, + void *user_data); + + /** + * @brief Performs pose landmarks detection on the @a source. + * @details Use this function to launch pose landmark detection. + * Each time when mv_inference_pose_landmark_detect_open() is + * called, @a detected_cb will receive a list pose landmark's locations + * in the media source. + * + * @since_tizen 6.0 + * @remarks This function is synchronous and may take considerable time to run. + * + * @param[in] source The handle to the source of the media + * @param[in] infer The handle to the inference + * @param[in] roi Rectangular area including a face in @a source which + * will be analyzed. If NULL, then the whole source will be + * analyzed. + * @param[in] detected_cb The callback which will receive the detection results. + * @param[in] user_data The user data passed from the code where + * mv_inference_pose_landmark_detect_open() is invoked. + * This data will be accessible in @a detected_cb callback. + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT Source colorspace + * isn't supported + * + * @pre Create a source handle by calling mv_create_source_open() + * @pre Create an inference handle by calling mv_inference_create_open() + * @pre Configure an inference handle by calling mv_inference_configure_open() + * @pre Prepare an inference by calling mv_inference_prepare_open() + * @post @a detected_cb will be called to provide detection results + * + * @see mv_inference_pose_landmark_detected_cb() + */ + int mv_inference_pose_landmark_detect_open( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_pose_landmark_detected_cb detected_cb, + void *user_data); + + /** + * @brief Gets the number of pose. + * + * @since_tizen 6.0 + * + * @param[in] result The handle to inference result + * @param[out] number_of_poses The pointer to the number of poses + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + */ + int mv_inference_pose_get_number_of_poses_open( + mv_inference_pose_result_h result, int *number_of_poses); + + /** + * @brief Gets the number of landmark per a pose. + * + * @since_tizen 6.0 + * + * @param[in] result The handle to inference result + * @param[out] number_of_landmarks The pointer to the number of landmarks + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + */ + int mv_inference_pose_get_number_of_landmarks_open( + mv_inference_pose_result_h result, int *number_of_landmarks); + + /** + * @brief Gets landmark location of a part of a pose. + * + * @since_tizen 6.0 + * + * @param[in] result The handle to inference result + * @param[in] pose_index The pose index + * @param[in] pose_part The part of a pose + * @param[out] location The location of a landmark + * @param[out] score The score of a landmark + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + */ + int mv_inference_pose_get_landmark_open( + mv_inference_pose_result_h result, int pose_index, int part_index, mv_point_s *location, float *score); + + /** + * @brief Gets a label of a pose. + * + * @since_tizen 6.0 + * + * @param[in] result The handle to inference result + * @param[in] pose_index The pose index between 0 and + * the number of poses which can be gotten by + * mv_inference_pose_get_number_of_poses() + * @param[out] label The label of a pose + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @see mv_inference_pose_get_number_of_poses() + * @see mv_inference_pose_get_number_of_landmarks() + * @see mv_inference_pose_landmark_detected_cb() + * @see mv_inference_pose_result_h + */ + int mv_inference_pose_get_label_open( + mv_inference_pose_result_h result, int pose_index, int *label); + + /** + * @brief Creates pose handle. + * @details Use this function to create a pose. + * + * @since_tizen 6.0 + * + * @remarks The @a pose should be released using mv_pose_destroy_open(). + * + * @param[out] infer The handle to the pose to be created + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_OUT_OF_MEMORY Out of memory + * + * @see mv_pose_destroy_open() + */ + int mv_pose_create_open(mv_pose_h *pose); + + /** + * @brief Destroys pose handle and releases all its resources. + * + * @since_tizen 6.0 + * + * @param[in] pose The handle to the pose to be destroyed + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * + * @pre Create pose handle by using mv_pose_create_open() + * + * @see mv_pose_create_open() + */ + int mv_pose_destroy_open(mv_pose_h pose); + + /** + * @brief Sets a motion capture file and its pose mapping file to the pose. + * @details Use this function to set a motion capture file and + * its pose mapping file. These are used by mv_pose_compare_open() + * to compare a given pose by mv_inference_pose_landmark_estimation_open(). + * + * @since_tizen 6.0 + * @remarks If the app sets paths to media storage, + * then the media storage privilege + * %http://tizen.org/privilege/mediastorage is needed.\n + * If the app sets the paths to external storage, + * then the external storage privilege + * %http://tizen.org/privilege/externalstorage is needed.\n + * If the required privileges aren't set properly, + * mv_pose_set_from_file_open() will returned #MEDIA_VISION_ERROR_PERMISSION_DENIED. + * + * @param[in] pose The handle to the pose + * @param[in] motionCaptureFilePath The file path to the motion capture file + * @param[in] motionMappingFilePath The file path to the motion mapping file + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_PERMISSION_DENIED Permission denied + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_PATH Invalid path of file paths + * @retval #MEDIA_VISION_ERROR_INTERNAL Internal error + */ + int mv_pose_set_from_file_open(mv_pose_h pose, const char *motionCaptureFilePath, const char *motionMappingFilePath); + + /** + * @brief Compares an action pose with the pose which is set by mv_pose_set_from_file_open(). + * @details Use this function to compare action pose with the pose + * which is set by mv_pose_set_from_file_open(). + * Parts to be compared can be selected by #mv_inference_human_body_part_e. + * Their similarity can be given by the score between 0 ~ 1. + * + * @since_tizen 6.0 + * + * @param[in] pose The handle to the pose + * @param[in] action The action pose + * @param[in] parts The parts to be compared + * @param[out] score The similarity score + * + * @return @c 0 on success, otherwise a negative error value + * @retval #MEDIA_VISION_ERROR_NONE Successful + * @retval #MEDIA_VISION_ERROR_NOT_SUPPORTED Not supported + * @retval #MEDIA_VISION_ERROR_INVALID_PARAMETER Invalid parameter + * @retval #MEDIA_VISION_ERROR_INVALID_OPERATION Invalid operation + * + * @pre Sets the pose by using mv_pose_set_from_file() + */ + int mv_pose_compare_open(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MEDIA_VISION_INFERENCE_OPEN_H__ */ diff --git a/mv_machine_learning/mv_inference/inference/src/Bvh.cpp b/mv_machine_learning/mv_inference/inference/src/Bvh.cpp new file mode 100644 index 00000000..80d75dfe --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/Bvh.cpp @@ -0,0 +1,96 @@ +/** + * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "Bvh.h" +#include "BvhUtils.h" +#include "mv_private.h" +#include + +namespace mediavision +{ +namespace inference +{ + + void Bvh::recalculate_joints_ltm(std::shared_ptr start_joint) { + + LOGI("ENTER"); + + if (start_joint == NULL) + { + if (root_joint_ == NULL) + return; + else + start_joint = root_joint_; + } + + LOGD("%s joint", start_joint->name().c_str()); + //LOG(DEBUG) << "recalculate_joints_ltm: " << start_joint->name(); + cv::Mat offmat_backup = cv::Mat::eye(4,4, CV_32F); + offmat_backup.at(0,3) = start_joint->offset().x; + offmat_backup.at(1,3) = start_joint->offset().y; + offmat_backup.at(2,3) = start_joint->offset().z; + + std::vector> data = start_joint->channel_data(); + + for (int i = 0; i < num_frames_; i++) { + cv::Mat offmat = offmat_backup; // offset matrix + cv::Mat rmat = cv::Mat::eye(4,4,CV_32F); // identity matrix set on rotation matrix + cv::Mat tmat = cv::Mat::eye(4,4,CV_32F); // identity matrix set on translation matrix + + for (int j = 0; j < start_joint->channels_order().size(); j++) { + if (start_joint->channels_order()[j] == Joint::Channel::XPOSITION) + tmat.at(0,3) = data[i][j]; + else if (start_joint->channels_order()[j] == Joint::Channel::YPOSITION) + tmat.at(1,3) = data[i][j]; + else if (start_joint->channels_order()[j] == Joint::Channel::ZPOSITION) + tmat.at(2,3) = data[i][j]; + else if (start_joint->channels_order()[j] == Joint::Channel::XROTATION) + rmat = rotate(rmat, data[i][j], Axis::X); + else if (start_joint->channels_order()[j] == Joint::Channel::YROTATION) + rmat = rotate(rmat, data[i][j], Axis::Y); + else if (start_joint->channels_order()[j] == Joint::Channel::ZROTATION) + rmat = rotate(rmat, data[i][j], Axis::Z); + } + + cv::Mat ltm = cv::Mat::eye(4,4,CV_32F); // local transformation matrix + + if (start_joint->parent() != NULL) + ltm = start_joint->parent()->ltm(i) * offmat; + else + ltm = tmat * offmat; + + cv::Vec3f wPos(ltm.at(0,3),ltm.at(1,3), ltm.at(2,3)); + start_joint->set_pos(wPos); + //LOG(TRACE) << "Joint world position: " << utils::vec3tos(ltm[3]); + LOGD("Joint world position: %f, %f, %f", wPos[0], wPos[1], wPos[2]); + + ltm = ltm * rmat; + + //LOG(TRACE) << "Local transformation matrix: \n" << utils::mat4tos(ltm); + + start_joint->set_ltm(ltm, i); + } // num frame + + for (auto& child : start_joint->children()) { + recalculate_joints_ltm(child); + } + + LOGI("LEAVE"); + } // recalculate_joints_ltm + +} // end of bvh +} diff --git a/mv_machine_learning/mv_inference/inference/src/BvhParser.cpp b/mv_machine_learning/mv_inference/inference/src/BvhParser.cpp new file mode 100644 index 00000000..6205c832 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/BvhParser.cpp @@ -0,0 +1,397 @@ +/** + * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BvhParser.h" +#include "mv_private.h" + +#include +#include +#include +#include + +/** Indicate whether bvh parser allows multi hierarchy or not + * Not fully tested + */ +#define MULTI_HIERARCHY 0 + +namespace { + +const std::string kChannels = "CHANNELS"; +const std::string kEnd = "End"; +const std::string kEndSite = "End Site"; +const std::string kFrame = "Frame"; +const std::string kFrames = "Frames:"; +const std::string kHierarchy = "HIERARCHY"; +const std::string kJoint = "JOINT"; +const std::string kMotion = "MOTION"; +const std::string kOffset = "OFFSET"; +const std::string kRoot = "ROOT"; + +const std::string kXpos = "Xposition"; +const std::string kYpos = "Yposition"; +const std::string kZpos = "Zposition"; +const std::string kXrot = "Xrotation"; +const std::string kYrot = "Yrotation"; +const std::string kZrot = "Zrotation"; + +} + +namespace mediavision +{ +namespace inference +{ + + //############################################################################## + // Main parse function + //############################################################################## + int BvhParser::parse(const std::string& path, Bvh* bvh) { + LOGI("ENTER"); + //LOG(INFO) << "Parsing file : " << path; + + path_ = path; + bvh_ = bvh; + + std::ifstream file; + file.open(path_); + + if (file.is_open()) { + std::string token; + + #if MULTI_HIERARCHY == 1 + while (file.good()) { + #endif + file >> token; + if (token == kHierarchy) { + int ret = parse_hierarchy(file); + if (ret) + return ret; + } else { + //LOG(ERROR) << "Bad structure of .bvh file. " << kHierarchy + // << " should be on the top of the file"; + return -1; + } + #if MULTI_HIERARCHY == 1 + } + #endif + } else { + //LOG(ERROR) << "Cannot open file to parse : " << path_; + return -1; + } + + LOGI("LEAVE"); + return 0; + } + + //############################################################################## + // Function parsing hierarchy + //############################################################################## + int BvhParser::parse_hierarchy(std::ifstream& file) { + //LOG(INFO) << "Parsing hierarchy"; + + std::string token; + int ret; + + if (file.good()) { + file >> token; + + //########################################################################## + // Parsing joints + //########################################################################## + if (token == kRoot) { + std::shared_ptr rootJoint; + ret = parse_joint(file, nullptr, rootJoint); + + if (ret) + return ret; + + LOGI("There is %d data channels", bvh_->num_channels()); + + bvh_->set_root_joint(rootJoint); + } else { + LOGE("Bad structure of .bvh file."); + + return -1; + } + } + + if (file.good()) { + file >> token; + + //########################################################################## + // Parsing motion data + //########################################################################## + if (token == kMotion) { + ret = parse_motion(file); + + if (ret) + return ret; + } else { + LOGE("Bad structure of .bvh file."); + + return -1; + } + } + return 0; + } + + //############################################################################## + // Function parsing joint + //############################################################################## + int BvhParser::parse_joint(std::ifstream& file, + std::shared_ptr parent, std::shared_ptr & parsed) { + + //LOG(TRACE) << "Parsing joint"; + + std::shared_ptr joint = std::make_shared(); + joint->set_parent(parent); + + std::string name; + file >> name; + + LOGD("Joint name %s", name.c_str()); + + joint->set_name(name); + + std::string token; + std::vector > children; + int ret; + + file >> token; // Consuming '{' + file >> token; + + //############################################################################ + // Offset parsing + //############################################################################ + if (token == kOffset) { + Joint::Offset offset; + + try { + file >> offset.x >> offset.y >> offset.z; + } catch (const std::ios_base::failure& e) { + //LOG(ERROR) << "Failure while parsing offset"; + return -1; + } + + joint->set_offset(offset); + + //LOG(TRACE) << "Offset x: " << offset.x << ", y: " << offset.y << ", z: " + // << offset.z; + + } else { + //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kOffset << ", but " + // << "found \"" << token << "\""; + + return -1; + } + + file >> token; + + //############################################################################ + // Channels parsing + //############################################################################ + if (token == kChannels) { + ret = parse_channel_order(file, joint); + + //LOG(TRACE) << "Joint has " << joint->num_channels() << " data channels"; + + if (ret) + return ret; + } else { + //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kChannels + // << ", but found \"" << token << "\""; + + return -1; + } + + file >> token; + + bvh_->add_joint(joint); + + //############################################################################ + // Children parsing + //############################################################################ + + while (file.good()) { + //########################################################################## + // Child joint parsing + //########################################################################## + if (token == kJoint) { + std::shared_ptr child; + ret = parse_joint(file, joint, child); + + if (ret) + return ret; + + children.push_back(child); + + //########################################################################## + // Child joint parsing + //########################################################################## + } else if (token == kEnd) { + file >> token >> token; // Consuming "Site {" + + std::shared_ptr tmp_joint = std::make_shared (); + + tmp_joint->set_parent(joint); + tmp_joint->set_name(kEndSite); + children.push_back(tmp_joint); + + file >> token; + + //######################################################################## + // End site offset parsing + //######################################################################## + if (token == kOffset) { + Joint::Offset offset; + + try { + file >> offset.x >> offset.y >> offset.z; + } catch (const std::ios_base::failure& e) { + //LOG(ERROR) << "Failure while parsing offset"; + return -1; + } + + tmp_joint->set_offset(offset); + + // LOG(TRACE) << "Joint name : EndSite"; + // LOG(TRACE) << "Offset x: " << offset.x << ", y: " << offset.y << ", z: " + // << offset.z; + + file >> token; // Consuming "}" + + } else { + //LOG(ERROR) << "Bad structure of .bvh file. Expected " << kOffset + // << ", but found \"" << token << "\""; + + return -1; + } + + bvh_->add_joint(tmp_joint); + //########################################################################## + // End joint parsing + //########################################################################## + } else if (token == "}") { + joint->set_children(children); + parsed = joint; + return 0; + } + + file >> token; + } + + //LOG(ERROR) << "Cannot parse joint, unexpected end of file. Last token : " + // << token; + return -1; + } + + //############################################################################## + // Motion data parse function + //############################################################################## + int BvhParser::parse_motion(std::ifstream& file) { + + LOGI("ENTER"); + + std::string token; + file >> token; + + int frames_num; + + if (token == kFrames) { + file >> frames_num; + bvh_->set_num_frames(frames_num); + LOGD("Num of frames: %d", frames_num); + } else { + LOGE("Bad structure of .bvh file"); + + return -1; + } + + file >> token; + + double frame_time; + + if (token == kFrame) { + file >> token; // Consuming 'Time:' + file >> frame_time; + bvh_->set_frame_time(frame_time); + LOGD("Frame time: %f",frame_time); + + float number; + for (int i = 0; i < frames_num; i++) { + for (auto joint : bvh_->joints()) { + std::vector data; + for (int j = 0; j < joint->num_channels(); j++) { + file >> number; + data.push_back(number); + } + LOGD("%s joint", joint->name().c_str()); + joint->add_frame_motion_data(data); + } + } + } else { + LOGE("Bad structure of .bvh file."); + return -1; + } + + LOGI("LEAVE"); + + return 0; + } + + //############################################################################## + // Channels order parse function + //############################################################################## + int BvhParser::parse_channel_order(std::ifstream& file, + std::shared_ptr joint) { + + LOGI("ENTER"); + + int num; + file >> num; + LOGD("Number of channels: %d",num); + + std::vector channels; + std::string token; + + for (int i = 0; i < num; i++) { + file >> token; + if (token == kXpos) + channels.push_back(Joint::Channel::XPOSITION); + else if (token == kYpos) + channels.push_back(Joint::Channel::YPOSITION); + else if (token == kZpos) + channels.push_back(Joint::Channel::ZPOSITION); + else if (token == kXrot) + channels.push_back(Joint::Channel::XROTATION); + else if (token == kYrot) + channels.push_back(Joint::Channel::YROTATION); + else if (token == kZrot) + channels.push_back(Joint::Channel::ZROTATION); + else { + //LOG(ERROR) << "Not valid channel!"; + return -1; + } + } + + joint->set_channels_order(channels); + + LOGI("LEAVE"); + + return 0; + } + +} +} // namespace diff --git a/mv_machine_learning/mv_inference/inference/src/BvhUtils.cpp b/mv_machine_learning/mv_inference/inference/src/BvhUtils.cpp new file mode 100644 index 00000000..ba11a910 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/BvhUtils.cpp @@ -0,0 +1,72 @@ +/** + * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BvhUtils.h" +#include + +#define DegreeToRadian(degree) ((degree) * (M_PI/180.f)) + +namespace mediavision +{ +namespace inference +{ + cv::Mat rotation_matrix(float angle, Axis axis) { + cv::Mat matrix = cv::Mat::eye(4,4,CV_32F); + + float rangle = DegreeToRadian(angle); + + // We want to unique situation when in matrix are -0.0f, so we perform + // additional checking + float sin_a = sin(rangle); + if (fabs(sin_a) < std::numeric_limits::epsilon()) + sin_a = 0.0f; + float cos_a = cos(rangle); + if (fabs(cos_a) < std::numeric_limits::epsilon()) + cos_a = 0.0f; + float msin_a = fabs(sin_a) < std::numeric_limits::epsilon() ? + 0.0f : (-1.0f) * sin_a; + + if (axis == Axis::X) { + matrix.at(1,1) = cos_a; + matrix.at(2,1) = sin_a; + matrix.at(1,2) = msin_a; + matrix.at(2,2) = cos_a; + } else if (axis == Axis::Y) { + matrix.at(0,0) = cos_a; + matrix.at(2,0) = msin_a; + matrix.at(0,2) = sin_a; + matrix.at(2,2) = cos_a; + } else { + matrix.at(0,0) = cos_a; + matrix.at(1,0) = sin_a; + matrix.at(0,1) = msin_a; + matrix.at(1,1) = cos_a; + } + + return matrix; + } + + /** Rotates matrix + * @param matrix The matrix to be rotated + * @param angle The rotation angle + * @param axis The rotation axis + * @return The rotation matrix + */ + cv::Mat rotate(cv::Mat matrix, float angle, Axis axis) { + return matrix * rotation_matrix(angle, axis); + } +} +} \ No newline at end of file diff --git a/mv_machine_learning/mv_inference/inference/src/Inference.cpp b/mv_machine_learning/mv_inference/inference/src/Inference.cpp new file mode 100755 index 00000000..aab4b815 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/Inference.cpp @@ -0,0 +1,1756 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "Inference.h" +#include "InferenceIni.h" +#include "ObjectDecoder.h" +#include + +#include +#include +#include +#include +#include + +#define MV_INFERENCE_OUTPUT_NUMBERS_MAX 10 +#define MV_INFERENCE_OUTPUT_NUMBERS_MIN 1 +#define MV_INFERENCE_CONFIDENCE_THRESHOLD_MAX 1.0 +#define MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN 0.0 + +typedef enum { + InputAttrNoType = 0, + InputAttrFloat32 = 1, + InputAttrInt32 = 2, + InputAttrUInt8 = 3, + InputAttrInt64 = 4, + InputAttrString = 5, + InputAttrBool = 6, +} InputAttrType; + +namespace mediavision +{ +namespace inference +{ + InferenceConfig::InferenceConfig() : + mConfigFilePath(), + mWeightFilePath(), + mUserFilePath(), + mDataType(MV_INFERENCE_DATA_FLOAT32), + mBackedType(MV_INFERENCE_BACKEND_NONE), + mTargetTypes(MV_INFERENCE_TARGET_DEVICE_CPU), + mConfidenceThresHold(), + mMeanValue(), + mStdValue(), + mMaxOutputNumbers(1) + { + mTensorInfo.width = -1; + mTensorInfo.height = -1; + mTensorInfo.dim = -1; + mTensorInfo.ch = -1; + } + + Inference::Inference() : + mCanRun(), + mConfig(), + mBackendCapacity(), + mSupportedInferenceBackend(), + mInputSize(cv::Size()), + mCh(), + mDim(), + mDeviation(), + mMean(), + mThreshold(), + mOutputNumbers(), + mSourceSize(cv::Size()), + mInputBuffer(cv::Mat()), + engine_config(), + mBackend(), + mPoseResult(NULL), + mMetadata(), + mPreProc(), + mPostProc() + { + LOGI("ENTER"); + + mSupportedInferenceBackend.insert(std::make_pair( + MV_INFERENCE_BACKEND_OPENCV, std::make_pair("opencv", false))); + mSupportedInferenceBackend.insert(std::make_pair( + MV_INFERENCE_BACKEND_TFLITE, std::make_pair("tflite", false))); + mSupportedInferenceBackend.insert(std::make_pair( + MV_INFERENCE_BACKEND_ARMNN, std::make_pair("armnn", false))); + mSupportedInferenceBackend.insert(std::make_pair( + MV_INFERENCE_BACKEND_MLAPI, std::make_pair("mlapi", false))); + mSupportedInferenceBackend.insert(std::make_pair( + MV_INFERENCE_BACKEND_ONE, std::make_pair("mlapi", false))); + + CheckSupportedInferenceBackend(); + + for (auto& backend : mSupportedInferenceBackend) { + LOGI("%s: %s", backend.second.first.c_str(), + backend.second.second ? "TRUE" : "FALSE"); + } + + mModelFormats.insert(std::make_pair( + "caffemodel", INFERENCE_MODEL_CAFFE)); + mModelFormats.insert( + std::make_pair("pb", INFERENCE_MODEL_TF)); + mModelFormats.insert(std::make_pair( + "tflite", INFERENCE_MODEL_TFLITE)); + mModelFormats.insert( + std::make_pair("t7", INFERENCE_MODEL_TORCH)); + mModelFormats.insert(std::make_pair( + "weights", INFERENCE_MODEL_DARKNET)); + mModelFormats.insert( + std::make_pair("bin", INFERENCE_MODEL_DLDT)); + mModelFormats.insert( + std::make_pair("onnx", INFERENCE_MODEL_ONNX)); + mModelFormats.insert(std::make_pair( + "nb", INFERENCE_MODEL_VIVANTE)); + + LOGI("LEAVE"); + } + + Inference::~Inference() + { + CleanupTensorBuffers(); + + if (!mInputLayerProperty.layers.empty()) { + mInputLayerProperty.layers.clear(); + std::map().swap( + mInputLayerProperty.layers); + } + if (!mOutputLayerProperty.layers.empty()) { + mOutputLayerProperty.layers.clear(); + std::map().swap( + mOutputLayerProperty.layers); + } + + if (mPoseResult) { + for (int poseIndex = 0; poseIndex < mPoseResult->number_of_poses; ++poseIndex) { + delete [] mPoseResult->landmarks[poseIndex]; + } + delete [] mPoseResult->landmarks; + delete mPoseResult; + } + + mModelFormats.clear(); + + // Release backend engine. + if (mBackend) { + mBackend->UnbindBackend(); + delete mBackend; + } + + LOGI("Released backend engine."); + } + + void Inference::CheckSupportedInferenceBackend() + { + LOGI("ENTER"); + + InferenceInI ini; + ini.LoadInI(); + + std::vector supportedBackend = ini.GetSupportedInferenceEngines(); + for (auto& backend : supportedBackend) { + LOGI("engine: %d", backend); + + mSupportedInferenceBackend[backend].second = true; + } + + LOGI("LEAVE"); + } + + int Inference::ConvertEngineErrorToVisionError(int error) + { + int ret = MEDIA_VISION_ERROR_NONE; + + switch (error) { + case INFERENCE_ENGINE_ERROR_NONE: + ret = MEDIA_VISION_ERROR_NONE; + break; + case INFERENCE_ENGINE_ERROR_NOT_SUPPORTED: + ret = MEDIA_VISION_ERROR_NOT_SUPPORTED; + break; + case INFERENCE_ENGINE_ERROR_MSG_TOO_LONG: + ret = MEDIA_VISION_ERROR_MSG_TOO_LONG; + break; + case INFERENCE_ENGINE_ERROR_NO_DATA: + ret = MEDIA_VISION_ERROR_NO_DATA; + break; + case INFERENCE_ENGINE_ERROR_KEY_NOT_AVAILABLE: + ret = MEDIA_VISION_ERROR_KEY_NOT_AVAILABLE; + break; + case INFERENCE_ENGINE_ERROR_OUT_OF_MEMORY: + ret = MEDIA_VISION_ERROR_OUT_OF_MEMORY; + break; + case INFERENCE_ENGINE_ERROR_INVALID_PARAMETER: + ret = MEDIA_VISION_ERROR_INVALID_PARAMETER; + break; + case INFERENCE_ENGINE_ERROR_INVALID_OPERATION: + ret = MEDIA_VISION_ERROR_INVALID_OPERATION; + break; + case INFERENCE_ENGINE_ERROR_PERMISSION_DENIED: + ret = MEDIA_VISION_ERROR_PERMISSION_DENIED; + break; + case INFERENCE_ENGINE_ERROR_NOT_SUPPORTED_FORMAT: + ret = MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; + break; + case INFERENCE_ENGINE_ERROR_INTERNAL: + ret = MEDIA_VISION_ERROR_INTERNAL; + break; + case INFERENCE_ENGINE_ERROR_INVALID_DATA: + ret = MEDIA_VISION_ERROR_INVALID_DATA; + break; + case INFERENCE_ENGINE_ERROR_INVALID_PATH: + ret = MEDIA_VISION_ERROR_INVALID_PATH; + break; + default: + LOGE("Unknown inference engine error type"); + } + + return ret; + } + + int Inference::ConvertTargetTypes(int given_types) + { + int target_types = INFERENCE_TARGET_NONE; + + if (given_types & MV_INFERENCE_TARGET_DEVICE_CPU) + target_types |= INFERENCE_TARGET_CPU; + if (given_types & MV_INFERENCE_TARGET_DEVICE_GPU) + target_types |= INFERENCE_TARGET_GPU; + if (given_types & MV_INFERENCE_TARGET_DEVICE_CUSTOM) + target_types |= INFERENCE_TARGET_CUSTOM; + + return target_types; + } + + int Inference::ConvertToCv(int given_type) + { + int type = 0; + + switch (given_type) { + case INFERENCE_TENSOR_DATA_TYPE_UINT8: + LOGI("Type is %d ch with UINT8", mCh); + type = mCh == 1 ? CV_8UC1 : CV_8UC3; + break; + case INFERENCE_TENSOR_DATA_TYPE_FLOAT32: + LOGI("Type is %d ch with FLOAT32", mCh); + type = mCh == 1 ? CV_32FC1 : CV_32FC3; + break; + default: + LOGI("unknown data type so FLOAT32 data type will be used in default"); + type = mCh == 1 ? CV_32FC1 : CV_32FC3; + break; + } + + return type; + } + + inference_tensor_data_type_e Inference::ConvertToIE(int given_type) + { + inference_tensor_data_type_e type = INFERENCE_TENSOR_DATA_TYPE_FLOAT32; + + switch (given_type) { + case MV_INFERENCE_DATA_FLOAT32: + type = INFERENCE_TENSOR_DATA_TYPE_FLOAT32; + break; + case MV_INFERENCE_DATA_UINT8: + type = INFERENCE_TENSOR_DATA_TYPE_UINT8; + break; + default: + LOGI("unknown data type so FLOAT32 data type will be used in default"); + break; + } + + return type; + } + + int Inference::Preprocess(cv::Mat cvImg, cv::Mat cvDst, int data_type) + { + mSourceSize = cvImg.size(); + int width = mInputSize.width; + int height = mInputSize.height; + + cv::Mat sample; + if (cvImg.channels() == 3 && mCh == 1) + cv::cvtColor(cvImg, sample, cv::COLOR_BGR2GRAY); + else + sample = cvImg; + + // size + cv::Mat sampleResized; + if (sample.size() != cv::Size(width, height)) + cv::resize(sample, sampleResized, cv::Size(width, height)); + else + sampleResized = sample; + + // type + cv::Mat sampleFloat; + if (mCh == 3) + sampleResized.convertTo(sampleFloat, CV_32FC3); + else + sampleResized.convertTo(sampleFloat, CV_32FC1); + + // normalize + cv::Mat sampleNormalized; + cv::Mat meanMat; + if (mCh == 3) + meanMat = cv::Mat(sampleFloat.size(), CV_32FC3, + cv::Scalar((float) mMean, (float) mMean, + (float) mMean)); + else + meanMat = cv::Mat(sampleFloat.size(), CV_32FC1, + cv::Scalar((float) mMean)); + + cv::subtract(sampleFloat, meanMat, sampleNormalized); + + sampleNormalized /= static_cast(mDeviation); + + sampleNormalized.convertTo(cvDst, data_type); + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::SetUserFile(std::string filename) + { + std::ifstream fp(filename.c_str()); + if (!fp.is_open()) { + return MEDIA_VISION_ERROR_INVALID_PATH; + } + + std::string userListName; + while (!fp.eof()) { + std::getline(fp, userListName); + if (userListName.length()) + mUserListName.push_back(userListName); + } + + fp.close(); + + return MEDIA_VISION_ERROR_NONE; + } + + void Inference::ConfigureModelFiles(const std::string modelConfigFilePath, + const std::string modelWeightFilePath, + const std::string modelUserFilePath) + { + LOGI("ENTER"); + + mConfig.mConfigFilePath = modelConfigFilePath; + mConfig.mWeightFilePath = modelWeightFilePath; + mConfig.mUserFilePath = modelUserFilePath; + + LOGI("LEAVE"); + } + + void Inference::ConfigureTensorInfo(int width, int height, int dim, int ch, + double stdValue, double meanValue) + { + LOGI("ENTER"); + + mConfig.mTensorInfo = { width, height, dim, ch }; + mConfig.mStdValue = stdValue; + mConfig.mMeanValue = meanValue; + + LOGI("LEAVE"); + } + + void Inference::ConfigureInputInfo(int width, int height, int dim, int ch, + double stdValue, double meanValue, + int dataType, + const std::vector names) + { + LOGI("ENTER"); + + // FIXME: mConfig should be removed + mConfig.mTensorInfo = { width, height, dim, ch }; + mConfig.mStdValue = stdValue; + mConfig.mMeanValue = meanValue; + mConfig.mDataType = static_cast(dataType); + mConfig.mInputLayerNames = names; + + const InputMetadata& inputMeta = mMetadata.GetInputMeta(); + if (inputMeta.parsed) { + LOGI("use input meta"); + auto& layerInfo = inputMeta.layer.begin()->second; + if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW + mConfig.mTensorInfo.ch = layerInfo.dims[1]; + mConfig.mTensorInfo.dim = layerInfo.dims[0]; + mConfig.mTensorInfo.width = layerInfo.dims[3]; + mConfig.mTensorInfo.height = layerInfo.dims[2]; + } else if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NHWC) {// NHWC + mConfig.mTensorInfo.ch = layerInfo.dims[3]; + mConfig.mTensorInfo.dim = layerInfo.dims[0]; + mConfig.mTensorInfo.width = layerInfo.dims[2]; + mConfig.mTensorInfo.height = layerInfo.dims[1]; + } else { + LOGE("Invalid shape type[%d]", layerInfo.shapeType); + } + + if (!inputMeta.option.empty()) { + auto& option = inputMeta.option.begin()->second; + if (option.normalization.use) { + mConfig.mMeanValue = option.normalization.mean[0]; + mConfig.mStdValue = option.normalization.std[0]; + } + } + + mConfig.mDataType = layerInfo.dataType; + mConfig.mInputLayerNames.clear(); + for (auto& layer : inputMeta.layer) { + mConfig.mInputLayerNames.push_back(layer.first); + } + } + + inference_engine_layer_property property; + // In case of that a inference plugin deosn't support to get properties, + // the tensor info given by a user will be used. + // If the plugin supports that, the given info will be ignored. + + for (auto& name : mConfig.mInputLayerNames) { + inference_engine_tensor_info tensor_info; + tensor_info.data_type = ConvertToIE(dataType); + + // In case of OpenCV, only supports NCHW + tensor_info.shape_type = INFERENCE_TENSOR_SHAPE_NCHW; + // modify to handle multiple tensor infos + tensor_info.shape.push_back(mConfig.mTensorInfo.dim); + tensor_info.shape.push_back(mConfig.mTensorInfo.ch); + tensor_info.shape.push_back(mConfig.mTensorInfo.height); + tensor_info.shape.push_back(mConfig.mTensorInfo.width); + + tensor_info.size = 1; + for (auto& dim : tensor_info.shape) { + tensor_info.size *= dim; + } + + property.layers.insert(std::make_pair(name, tensor_info)); + } + + int ret = mBackend->SetInputLayerProperty(property); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + LOGE("Fail to set input layer property"); + } + + LOGI("LEAVE"); + } + + void Inference::ConfigureOutputInfo(const std::vector names) + { + LOGI("ENTER"); + + mConfig.mOutputLayerNames = names; + + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + if (outputMeta.IsParsed()) { + mConfig.mOutputLayerNames.clear(); + if (!outputMeta.GetScore().GetName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetScore().GetName()); + + if (!outputMeta.GetBox().GetName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetBox().GetName()); + + if (!outputMeta.GetLabel().GetName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetLabel().GetName()); + + if (!outputMeta.GetNumber().GetName().empty()) + mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName()); + } + + inference_engine_layer_property property; + + inference_engine_tensor_info tensor_info = { std::vector{1}, + INFERENCE_TENSOR_SHAPE_NCHW, + INFERENCE_TENSOR_DATA_TYPE_FLOAT32, + 1}; + for (auto& name : mConfig.mOutputLayerNames) { + property.layers.insert(std::make_pair(name, tensor_info)); + } + + int ret = mBackend->SetOutputLayerProperty(property); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + LOGE("Fail to set output layer property"); + } + + LOGI("LEAVE"); + } + + int Inference::ConfigureBackendType( + const mv_inference_backend_type_e backendType) + { + // Check if a given backend type is valid or not. + if (backendType <= MV_INFERENCE_BACKEND_NONE || + backendType >= MV_INFERENCE_BACKEND_MAX) { + LOGE("Invalid backend type."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + std::pair backend = + mSupportedInferenceBackend[backendType]; + if (backend.second == false) { + LOGE("%s type is not supported", (backend.first).c_str()); + return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; + } + + LOGI("backend engine : %d", backendType); + + mConfig.mBackedType = backendType; + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::ConfigureTargetTypes(const int targetType) + { + // Check if given target types are valid or not. + if (MV_INFERENCE_TARGET_NONE >= targetType || + MV_INFERENCE_TARGET_MAX <= targetType) { + LOGE("Invalid target device."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGI("Before converting target types : %d", targetType); + + unsigned int new_type = MV_INFERENCE_TARGET_DEVICE_NONE; + + // Convert old type to new one. + switch (targetType) { + case MV_INFERENCE_TARGET_CPU: + if (mBackendCapacity.supported_accel_devices != INFERENCE_TARGET_CPU) { + LOGE("Backend doesn't support CPU acceleration."); + return MEDIA_VISION_ERROR_NOT_SUPPORTED; + } + new_type = MV_INFERENCE_TARGET_DEVICE_CPU; + break; + case MV_INFERENCE_TARGET_GPU: + if (mBackendCapacity.supported_accel_devices != INFERENCE_TARGET_GPU) { + LOGE("Backend doesn't support GPU acceleration."); + return MEDIA_VISION_ERROR_NOT_SUPPORTED; + } + new_type = MV_INFERENCE_TARGET_DEVICE_GPU; + break; + case MV_INFERENCE_TARGET_CUSTOM: + if (mBackendCapacity.supported_accel_devices != INFERENCE_TARGET_CUSTOM) { + LOGE("Backend doesn't support custom device acceleration."); + return MEDIA_VISION_ERROR_NOT_SUPPORTED; + } + new_type = MV_INFERENCE_TARGET_DEVICE_CUSTOM; + break; + } + + LOGI("After converting target types : %d", new_type); + + mConfig.mTargetTypes = new_type; + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::ConfigureTargetDevices(const int targetDevices) + { + // Check if given target types are valid or not. + if (MV_INFERENCE_TARGET_DEVICE_NONE >= targetDevices || + MV_INFERENCE_TARGET_DEVICE_MAX <= targetDevices) { + LOGE("Invalid target device."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGI("target devices : %d", targetDevices); + + if (!(mBackendCapacity.supported_accel_devices & targetDevices)) { + LOGE("Backend doesn't support a given device acceleration."); + return MEDIA_VISION_ERROR_NOT_SUPPORTED; + } + + mConfig.mTargetTypes = targetDevices; + + return MEDIA_VISION_ERROR_NONE; + } + + void Inference::ConfigureOutput(const int maxOutputNumbers) + { + mConfig.mMaxOutputNumbers = std::max( + std::min(maxOutputNumbers, MV_INFERENCE_OUTPUT_NUMBERS_MAX), + MV_INFERENCE_OUTPUT_NUMBERS_MIN); + } + + void Inference::ConfigureThreshold(const double threshold) + { + mConfig.mConfidenceThresHold = std::max( + std::min(threshold, MV_INFERENCE_CONFIDENCE_THRESHOLD_MAX), + MV_INFERENCE_CONFIDENCE_THRESHOLD_MIN); + } + + int Inference::ParseMetadata(const std::string filePath) + { + LOGI("ENTER"); + int ret = mMetadata.Init(filePath); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to init metadata[%d]", ret); + return ret; + } + + ret = mMetadata.Parse(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to parse metadata[%d]", ret); + return ret; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + void Inference::CleanupTensorBuffers(void) + { + LOGI("ENTER"); + + if (!mInputTensorBuffers.empty()) { + mInputTensorBuffers.clear(); + } + + if (!mOutputTensorBuffers.empty()) { + mOutputTensorBuffers.clear(); + } + + LOGI("LEAVE"); + } + + int Inference::PrepareTenosrBuffers(void) + { + // If there are input and output tensor buffers allocated before then release the buffers. + // They will be allocated again according to a new model file to be loaded. + CleanupTensorBuffers(); + + // IF model file is loaded again then the model type could be different so + // clean up input and output layer properties so that they can be updated again + // after reloading the model file. + if (!mInputLayerProperty.layers.empty()) { + mInputLayerProperty.layers.clear(); + std::map().swap( + mInputLayerProperty.layers); + } + if (!mOutputLayerProperty.layers.empty()) { + mOutputLayerProperty.layers.clear(); + std::map().swap( + mOutputLayerProperty.layers); + } + + // Get input tensor buffers from a backend engine if the backend engine allocated. + auto& inputTensorBuffers = mInputTensorBuffers.getAllTensorBuffer(); + int ret = mBackend->GetInputTensorBuffers(inputTensorBuffers); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + LOGE("Fail to get input tensor buffers from backend engine."); + return ConvertEngineErrorToVisionError(ret); + } + + ret = mBackend->GetInputLayerProperty(mInputLayerProperty); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + LOGE("Fail to get input layer property from backend engine."); + return ConvertEngineErrorToVisionError(ret); + } + + // If the backend engine isn't able to allocate input tensor buffers internally, + // then allocate the buffers at here. + if (mInputTensorBuffers.empty()) { + for(auto& layer : mInputLayerProperty.layers) { + const inference_engine_tensor_info& tensor_info = layer.second; + inference_engine_tensor_buffer tensor_buffer; + if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_FLOAT32) { + tensor_buffer.buffer = new float[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_UINT8) { + tensor_buffer.buffer = new unsigned char[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_UINT16) { + tensor_buffer.buffer = new unsigned short[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else { + LOGE("Invalid input tensor data type."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + if (tensor_buffer.buffer == NULL) { + LOGE("Fail to allocate input tensor buffer."); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + LOGI("Allocated input tensor buffer(size = %zu, data type = %d)", + tensor_info.size, tensor_info.data_type); + tensor_buffer.owner_is_backend = 0; + tensor_buffer.data_type = tensor_info.data_type; + mInputTensorBuffers.setTensorBuffer(layer.first, tensor_buffer); + } + } + + LOGI("Input tensor buffer count is %zu", mInputTensorBuffers.size()); + + // Get output tensor buffers from a backend engine if the backend engine allocated. + auto& outputTensorBuffers = mOutputTensorBuffers.getAllTensorBuffer(); + ret = mBackend->GetOutputTensorBuffers(outputTensorBuffers); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + LOGE("Fail to get output tensor buffers from backend engine."); + return ConvertEngineErrorToVisionError(ret); + } + + ret = mBackend->GetOutputLayerProperty(mOutputLayerProperty); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + LOGE("Fail to get output layer property from backend engine."); + return ConvertEngineErrorToVisionError(ret); + } + + // If the backend engine isn't able to allocate output tensor buffers internally, + // then allocate the buffers at here. + if (mOutputTensorBuffers.empty()) { + for (auto& layer : mOutputLayerProperty.layers) { + const inference_engine_tensor_info& tensor_info = layer.second; + inference_engine_tensor_buffer tensor_buffer; + if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_FLOAT32) { + tensor_buffer.buffer = new float[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_INT64) { + tensor_buffer.buffer = new long long[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_UINT32) { + tensor_buffer.buffer = new unsigned int[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_UINT8) { + tensor_buffer.buffer = new char[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else if (tensor_info.data_type == + INFERENCE_TENSOR_DATA_TYPE_UINT16) { + tensor_buffer.buffer = new unsigned short[tensor_info.size]; + tensor_buffer.size = tensor_info.size; + } else { + LOGE("Invalid output tensor data type."); + CleanupTensorBuffers(); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + if (tensor_buffer.buffer == NULL) { + LOGE("Fail to allocate output tensor buffer."); + CleanupTensorBuffers(); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + LOGI("Allocated output tensor buffer(size = %zu, data type = %d)", + tensor_info.size, tensor_info.data_type); + + tensor_buffer.owner_is_backend = 0; + tensor_buffer.data_type = tensor_info.data_type; + mOutputTensorBuffers.setTensorBuffer(layer.first, tensor_buffer); + } + } + + LOGI("Output tensor buffer count is %zu", mOutputTensorBuffers.size()); + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::FillOutputResult(tensor_t &outputData) + { + for (auto& layer : mOutputLayerProperty.layers) { + const inference_engine_tensor_info& tensor_info = layer.second; + + std::vector tmpDimInfo; + for (auto& dim : tensor_info.shape) { + tmpDimInfo.push_back(dim); + } + + outputData.dimInfo.push_back(tmpDimInfo); + + inference_engine_tensor_buffer* tensorBuffers = + mOutputTensorBuffers.getTensorBuffer(layer.first); + if (tensorBuffers == NULL) { + LOGE("Fail to getTensorBuffer with name %s", layer.first.c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + // Normalize output tensor data converting it to float type in case of quantized model. + if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT8) { + float *new_buf = new float[tensor_info.size]; + if (new_buf == NULL) { + LOGE("Fail to allocate a new output tensor buffer."); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + auto *ori_buf = static_cast( + tensorBuffers->buffer); + + for (int j = 0; j < tensor_info.size; j++) { + new_buf[j] = static_cast(ori_buf[j]) / 255.0f; + } + + // replace original buffer with new one, and release origin one. + tensorBuffers->buffer = new_buf; + + if (!tensorBuffers->owner_is_backend) + delete[] ori_buf; + } + + if (tensor_info.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT16) { + float *new_buf = new float[tensor_info.size]; + if (new_buf == NULL) { + LOGE("Fail to allocate a new output tensor buffer."); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + auto *ori_buf = + static_cast(tensorBuffers->buffer); + + for (int j = 0; j < tensor_info.size; j++) { + new_buf[j] = static_cast(ori_buf[j]); + } + + // replace original buffer with new one, and release origin one. + tensorBuffers->buffer = new_buf; + + if (!tensorBuffers->owner_is_backend) + delete[] ori_buf; + } + + outputData.data.push_back(static_cast(tensorBuffers->buffer)); + } + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::Bind(void) + { + LOGI("ENTER"); + + if (mConfig.mBackedType <= MV_INFERENCE_BACKEND_NONE || + mConfig.mBackedType >= MV_INFERENCE_BACKEND_MAX) { + LOGE("NOT SUPPORTED BACKEND %d", mConfig.mBackedType); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + std::string backendName = mSupportedInferenceBackend[mConfig.mBackedType].first; + LOGI("backend string name: %s", backendName.c_str()); + + inference_engine_config config = { + .backend_name = backendName, + .backend_type = mConfig.mBackedType, + // As a default, Target device is CPU. If user defined desired device type in json file + // then the device type will be set by Load callback. + .target_devices = mConfig.mTargetTypes, + }; + + // Create a backend class object. + try { + mBackend = new InferenceEngineCommon(); + } catch (const std::bad_alloc &ex) { + LOGE("Fail to create backend : %s", ex.what()); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + int ret = MEDIA_VISION_ERROR_NONE; + + // Load configuration file if a given backend type is mlapi. + if (config.backend_type == MV_INFERENCE_BACKEND_MLAPI) { + ret = mBackend->LoadConfigFile(); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + } + + // Bind a backend library. + ret = mBackend->BindBackend(&config); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + LOGE("Fail to bind backend library.(%d)", mConfig.mBackedType); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + // Get capacity information from a backend. + ret = mBackend->GetBackendCapacity(&mBackendCapacity); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get backend capacity."); + return ret; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::Prepare(void) + { + LOGI("ENTER"); + + mCh = mConfig.mTensorInfo.ch; + mDim = mConfig.mTensorInfo.dim; + mInputSize = + cv::Size(mConfig.mTensorInfo.width, mConfig.mTensorInfo.height); + LOGI("InputSize is %d x %d\n", mInputSize.width, mInputSize.height); + + mDeviation = mConfig.mStdValue; + mMean = mConfig.mMeanValue; + LOGI("mean %.4f, deviation %.4f", mMean, mDeviation); + + mOutputNumbers = mConfig.mMaxOutputNumbers; + LOGI("outputNumber %d", mOutputNumbers); + + mThreshold = mConfig.mConfidenceThresHold; + LOGI("threshold %.4f", mThreshold); + + // Check if backend supports a given target device/devices or not. + if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_CPU) { + if (!(mBackendCapacity.supported_accel_devices & + INFERENCE_TARGET_CPU)) { + LOGE("Backend doesn't support CPU device as an accelerator."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + } + + if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_GPU) { + if (!(mBackendCapacity.supported_accel_devices & + INFERENCE_TARGET_GPU)) { + LOGE("Backend doesn't support CPU device as an accelerator."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + } + + if (mConfig.mTargetTypes & MV_INFERENCE_TARGET_DEVICE_CUSTOM) { + if (!(mBackendCapacity.supported_accel_devices & + INFERENCE_TARGET_CUSTOM)) { + LOGE("Backend doesn't support CPU device as an accelerator."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + } + + mBackend->SetTargetDevices(ConvertTargetTypes(mConfig.mTargetTypes)); + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::Load(void) + { + LOGI("ENTER"); + + std::string label_file = mConfig.mUserFilePath; + size_t userFileLength = label_file.length(); + if (userFileLength > 0 && access(label_file.c_str(), F_OK)) { + LOGE("Label file path in [%s] ", label_file.c_str()); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + int ret = (userFileLength > 0) ? SetUserFile(label_file) : + MEDIA_VISION_ERROR_NONE; + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to load label file."); + return ret; + } + + // Check if model file is valid or not. + std::string ext_str = mConfig.mWeightFilePath.substr( + mConfig.mWeightFilePath.find_last_of(".") + 1); + std::map::iterator key = mModelFormats.find(ext_str); + if (key == mModelFormats.end()) { + LOGE("Invalid model file format.(ext = %s)", ext_str.c_str()); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGI("%s model file has been detected.", ext_str.c_str()); + + std::vector models; + + inference_model_format_e model_format = + static_cast(key->second); + + // Push model file information to models vector properly according to detected model format. + switch (model_format) { + case INFERENCE_MODEL_CAFFE: + case INFERENCE_MODEL_TF: + case INFERENCE_MODEL_DARKNET: + case INFERENCE_MODEL_DLDT: + case INFERENCE_MODEL_ONNX: + case INFERENCE_MODEL_VIVANTE: + models.push_back(mConfig.mWeightFilePath); + models.push_back(mConfig.mConfigFilePath); + break; + case INFERENCE_MODEL_TFLITE: + case INFERENCE_MODEL_TORCH: + models.push_back(mConfig.mWeightFilePath); + break; + default: + break; + } + + // Request model loading to backend engine. + ret = mBackend->Load(models, model_format); + if (ret != INFERENCE_ENGINE_ERROR_NONE) { + delete mBackend; + LOGE("Fail to load model"); + mCanRun = false; + std::vector().swap(models); + return ConvertEngineErrorToVisionError(ret); + } + + std::vector().swap(models); + + // Prepare input and output tensor buffers. + PrepareTenosrBuffers(); + + mCanRun = true; + + LOGI("LEAVE"); + + return ConvertEngineErrorToVisionError(ret); + } + + int Inference::Run(std::vector &mvSources, + std::vector &rects) + { + int ret = INFERENCE_ENGINE_ERROR_NONE; + + if (!mCanRun) { + LOGE("Invalid to run inference"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + /* convert mv_source to cv::Mat */ + cv::Mat cvSource; + cv::Rect cvRoi; + unsigned int width = 0, height = 0; + unsigned int bufferSize = 0; + unsigned char *buffer = NULL; + + if (mvSources.empty()) { + LOGE("mvSources should contain only one cv source."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + // We are able to request Only one input data for the inference as of now. + if (mvSources.size() > 1) { + LOGE("It allows only one mv source for the inference."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + // TODO. Consider multiple sources. + mv_source_h mvSource = mvSources.front(); + mv_rectangle_s *roi = rects.empty() ? NULL : &(rects.front()); + + mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID; + + if (mv_source_get_width(mvSource, &width) != MEDIA_VISION_ERROR_NONE || + mv_source_get_height(mvSource, &height) != + MEDIA_VISION_ERROR_NONE || + mv_source_get_colorspace(mvSource, &colorspace) != + MEDIA_VISION_ERROR_NONE || + mv_source_get_buffer(mvSource, &buffer, &bufferSize)) + return MEDIA_VISION_ERROR_INTERNAL; + + // TODO. Let's support various color spaces. + + if (colorspace != MEDIA_VISION_COLORSPACE_RGB888) { + LOGE("Not Supported format!\n"); + return MEDIA_VISION_ERROR_NOT_SUPPORTED_FORMAT; + } + + if (roi == NULL) { + cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), + buffer) + .clone(); + } else { + cvRoi.x = roi->point.x; + cvRoi.y = roi->point.y; + cvRoi.width = (roi->point.x + roi->width) >= width ? + width - roi->point.x : + roi->width; + cvRoi.height = (roi->point.y + roi->height) >= height ? + height - roi->point.y : + roi->height; + cvSource = cv::Mat(cv::Size(width, height), CV_MAKETYPE(CV_8U, 3), + buffer)(cvRoi) + .clone(); + } + + LOGI("Size: w:%u, h:%u", cvSource.size().width, cvSource.size().height); + + if (mCh != 1 && mCh != 3) { + LOGE("Channel not supported."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + const InputMetadata& inputMeta = mMetadata.GetInputMeta(); + if (inputMeta.parsed) { + for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) { + inference_engine_tensor_buffer& tensor_buffer = buffer.second; + const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first); + const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first); + + int data_type = ConvertToCv(tensor_buffer.data_type); + + // mSourceSize is original input image's size + mSourceSize = cvSource.size(); + // TODO: try-catch{} error handling + ret = mPreProc.Run(cvSource, colorspace, data_type, layerInfo, opt, tensor_buffer.buffer); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to run pre-process."); + return ret; + } + } + } else { + for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) { + inference_engine_tensor_buffer& tensor_buffer = buffer.second; + + int data_type = ConvertToCv(tensor_buffer.data_type); + + // Convert color space of input tensor data and then normalize it. + + ret = Preprocess(cvSource, + cv::Mat(mInputSize.height, mInputSize.width, + data_type, tensor_buffer.buffer), + data_type); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to preprocess input tensor data."); + return ret; + } + } + } + + ret = mBackend->Run(mInputTensorBuffers.getAllTensorBuffer(), + mOutputTensorBuffers.getAllTensorBuffer()); + + return ConvertEngineErrorToVisionError(ret); + } + + std::pair + Inference::GetSupportedInferenceBackend(int backend) + { + return mSupportedInferenceBackend[backend]; + } + + int Inference::GetClassficationResults( + ImageClassificationResults *classificationResults) + { + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + if (outputMeta.IsParsed()) { + std::vector> topScore; + float value = 0.0f; + auto& info = outputMeta.GetScore(); + + std::vector indexes = info.GetDimInfo().GetValidIndexAll(); + if (indexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + int classes = mOutputLayerProperty.layers[info.GetName()].shape[indexes[0]]; + + if (!mOutputTensorBuffers.exist(info.GetName())) { + LOGE("output buffe is NULL"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + mPostProc.ScoreClear(info.GetTopNumber()); + for (int cId = 0; cId < classes; ++cId) { + try { + value = mOutputTensorBuffers.getValue(info.GetName(), cId); + } catch (const std::exception& e) { + LOGE(" Fail to get getValue with %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + if (info.GetDeQuant()) { + value = PostProcess::dequant(value, + info.GetDeQuant()->GetScale(), + info.GetDeQuant()->GetZeroPoint()); + } + if (info.GetType() == 1) { + value = PostProcess::sigmoid(value); + } + + if (value < info.GetThresHold()) + continue; + + LOGI("id[%d]: %.3f", cId, value); + mPostProc.ScorePush(value, cId); + } + mPostProc.ScorePop(topScore); + + ImageClassificationResults results; + results.number_of_classes = 0; + for (auto& value : topScore) { + LOGI("score: %.3f, threshold: %.3f", value.first, info.GetThresHold()); + LOGI("idx:%d", value.second); + LOGI("classProb: %.3f", value.first); + + results.indices.push_back(value.second); + results.confidences.push_back(value.first); + results.names.push_back(mUserListName[value.second]); + results.number_of_classes++; + } + + *classificationResults = results; + LOGE("Inference: GetClassificationResults: %d\n", + results.number_of_classes); + + } else { + tensor_t outputData; + + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; + } + + // Will contain top N results in ascending order. + std::vector > top_results; + std::priority_queue, + std::vector >, + std::greater > > + top_result_pq; + float value = 0.0f; + + std::vector > inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); + + int count = inferDimInfo[0][1]; + LOGI("count: %d", count); + float *prediction = reinterpret_cast(inferResults[0]); + for (int i = 0; i < count; ++i) { + value = prediction[i]; + + // Only add it if it beats the threshold and has a chance at being in + // the top N. + top_result_pq.push(std::pair(value, i)); + + // If at capacity, kick the smallest value out. + if (top_result_pq.size() > mOutputNumbers) { + top_result_pq.pop(); + } + } + + // Copy to output vector and reverse into descending order. + while (!top_result_pq.empty()) { + top_results.push_back(top_result_pq.top()); + top_result_pq.pop(); + } + std::reverse(top_results.begin(), top_results.end()); + + int classIdx = -1; + ImageClassificationResults results; + results.number_of_classes = 0; + for (int idx = 0; idx < top_results.size(); ++idx) { + if (top_results[idx].first < mThreshold) + continue; + LOGI("idx:%d", idx); + LOGI("classIdx: %d", top_results[idx].second); + LOGI("classProb: %f", top_results[idx].first); + + classIdx = top_results[idx].second; + results.indices.push_back(classIdx); + results.confidences.push_back(top_results[idx].first); + results.names.push_back(mUserListName[classIdx]); + results.number_of_classes++; + } + + *classificationResults = results; + LOGE("Inference: GetClassificationResults: %d\n", + results.number_of_classes); + } + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::GetObjectDetectionResults( + ObjectDetectionResults *detectionResults) + { + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + if (outputMeta.IsParsed()) { + // decoding type + auto& boxInfo = outputMeta.GetBox(); + auto& scoreInfo = outputMeta.GetScore(); + if (!mOutputTensorBuffers.exist(boxInfo.GetName()) || + !mOutputTensorBuffers.exist(scoreInfo.GetName()) ){ + LOGE("output buffers named of %s or %s are NULL", + boxInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + int boxOffset = 0; + int numberOfObjects = 0; + if (boxInfo.GetDecodingType() == 0) { + std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + if (boxIndexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + } else { + std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + if (boxIndexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + + std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); + if (scoreIndexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + numberOfObjects = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]]; + } + + ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset, + static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()), + static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()), + numberOfObjects); + + objDecoder.init(); + objDecoder.decode(); + ObjectDetectionResults results; + results.number_of_objects = 0; + + for (auto& box : objDecoder.getObjectAll()) { + results.indices.push_back(box.index); + results.names.push_back(mUserListName[box.index]); + results.confidences.push_back(box.score); + results.locations.push_back(cv::Rect( + static_cast((box.location.x - box.location.width * 0.5f) * static_cast(mSourceSize.width)), + static_cast((box.location.y - box.location.height * 0.5f) * static_cast(mSourceSize.height)), + static_cast(box.location.width * static_cast(mSourceSize.width)), + static_cast(box.location.height * static_cast(mSourceSize.height)))); + results.number_of_objects++; + } + *detectionResults = results; + LOGI("Inference: GetObjectDetectionResults: %d\n", + results.number_of_objects); + } else { + tensor_t outputData; + + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; + } + + // In case of object detection, + // a model may apply post-process but others may not. + // Thus, those cases should be hanlded separately. + std::vector > inferDimInfo(outputData.dimInfo); + LOGI("inferDimInfo size: %zu", outputData.dimInfo.size()); + + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); + LOGI("inferResults size: %zu", inferResults.size()); + + float *boxes = nullptr; + float *classes = nullptr; + float *scores = nullptr; + int number_of_detections = 0; + + cv::Mat cvScores, cvClasses, cvBoxes; + if (outputData.dimInfo.size() == 1) { + // there is no way to know how many objects are detect unless the number of objects aren't + // provided. In the case, each backend should provide the number of results manually. + // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are + // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7 + // indicates the image id. But it is useless if a batch mode isn't supported. + // So, use the 1st of 7. + + number_of_detections = static_cast( + *reinterpret_cast(outputData.data[0])); + cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], + CV_32F, outputData.data[0]); + + // boxes + cv::Mat cvLeft = cvOutputData.col(3).clone(); + cv::Mat cvTop = cvOutputData.col(4).clone(); + cv::Mat cvRight = cvOutputData.col(5).clone(); + cv::Mat cvBottom = cvOutputData.col(6).clone(); + + cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight }; + cv::hconcat(cvBoxElems, 4, cvBoxes); + + // classes + cvClasses = cvOutputData.col(1).clone(); + + // scores + cvScores = cvOutputData.col(2).clone(); + + boxes = cvBoxes.ptr(0); + classes = cvClasses.ptr(0); + scores = cvScores.ptr(0); + + } else { + boxes = reinterpret_cast(inferResults[0]); + classes = reinterpret_cast(inferResults[1]); + scores = reinterpret_cast(inferResults[2]); + number_of_detections = + (int) (*reinterpret_cast(inferResults[3])); + } + + LOGI("number_of_detections = %d", number_of_detections); + + int left, top, right, bottom; + cv::Rect loc; + + ObjectDetectionResults results; + results.number_of_objects = 0; + for (int idx = 0; idx < number_of_detections; ++idx) { + if (scores[idx] < mThreshold) + continue; + + left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); + top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); + right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); + bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); + + loc.x = left; + loc.y = top; + loc.width = right - left + 1; + loc.height = bottom - top + 1; + + results.indices.push_back(static_cast(classes[idx])); + results.confidences.push_back(scores[idx]); + results.names.push_back( + mUserListName[static_cast(classes[idx])]); + results.locations.push_back(loc); + results.number_of_objects++; + + LOGI("objectClass: %d", static_cast(classes[idx])); + LOGI("confidence:%f", scores[idx]); + LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, + bottom); + } + + *detectionResults = results; + LOGI("Inference: GetObjectDetectionResults: %d\n", + results.number_of_objects); + } + + return MEDIA_VISION_ERROR_NONE; + } + + int + Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults) + { + OutputMetadata& outputMeta = mMetadata.GetOutputMeta(); + if (outputMeta.IsParsed()) { + // decoding type + auto& boxInfo = outputMeta.GetBox(); + auto& scoreInfo = outputMeta.GetScore(); + if (!mOutputTensorBuffers.exist(boxInfo.GetName()) || + !mOutputTensorBuffers.exist(scoreInfo.GetName())){ + LOGE("output buffers named of %s or %s are NULL", + boxInfo.GetName().c_str(), scoreInfo.GetName().c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + int boxOffset = 0; + int numberOfFaces = 0; + if (boxInfo.GetDecodingType() == 0) { + std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + if (boxIndexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + } else { + std::vector boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll(); + if (boxIndexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]]; + + std::vector scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll(); + if (scoreIndexes.size() != 1) { + LOGE("Invaid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + numberOfFaces = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]]; + } + + ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset, + static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()), + static_cast(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()), + numberOfFaces); + + objDecoder.init(); + objDecoder.decode(); + FaceDetectionResults results; + results.number_of_faces = 0; + + for (auto& face : objDecoder.getObjectAll()) { + results.confidences.push_back(face.score); + results.locations.push_back(cv::Rect( + static_cast((face.location.x - face.location.width * 0.5f) * static_cast(mSourceSize.width)), + static_cast((face.location.y - face.location.height * 0.5f) * static_cast(mSourceSize.height)), + static_cast(face.location.width * static_cast(mSourceSize.width)), + static_cast(face.location.height * static_cast(mSourceSize.height)))); + results.number_of_faces++; + } + *detectionResults = results; + LOGE("Inference: GetFaceDetectionResults: %d\n", + results.number_of_faces); + } else { + tensor_t outputData; + + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; + } + + // In case of object detection, + // a model may apply post-process but others may not. + // Thus, those cases should be handled separately. + std::vector > inferDimInfo(outputData.dimInfo); + LOGI("inferDimInfo size: %zu", outputData.dimInfo.size()); + + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); + LOGI("inferResults size: %zu", inferResults.size()); + + float *boxes = nullptr; + float *classes = nullptr; + float *scores = nullptr; + int number_of_detections = 0; + + cv::Mat cvScores, cvClasses, cvBoxes; + if (outputData.dimInfo.size() == 1) { + // there is no way to know how many objects are detect unless the number of objects aren't + // provided. In the case, each backend should provide the number of results manually. + // For example, in OpenCV, MobilenetV1-SSD doesn't provide it so the number of objects are + // written to the 1st element i.e., outputData.data[0] (the shape is 1x1xNx7 and the 1st of 7 + // indicates the image id. But it is useless if a batch mode isn't supported. + // So, use the 1st of 7. + + number_of_detections = static_cast( + *reinterpret_cast(outputData.data[0])); + cv::Mat cvOutputData(number_of_detections, inferDimInfo[0][3], + CV_32F, outputData.data[0]); + + // boxes + cv::Mat cvLeft = cvOutputData.col(3).clone(); + cv::Mat cvTop = cvOutputData.col(4).clone(); + cv::Mat cvRight = cvOutputData.col(5).clone(); + cv::Mat cvBottom = cvOutputData.col(6).clone(); + + cv::Mat cvBoxElems[] = { cvTop, cvLeft, cvBottom, cvRight }; + cv::hconcat(cvBoxElems, 4, cvBoxes); + + // classes + cvClasses = cvOutputData.col(1).clone(); + + // scores + cvScores = cvOutputData.col(2).clone(); + + boxes = cvBoxes.ptr(0); + classes = cvClasses.ptr(0); + scores = cvScores.ptr(0); + + } else { + boxes = reinterpret_cast(inferResults[0]); + classes = reinterpret_cast(inferResults[1]); + scores = reinterpret_cast(inferResults[2]); + number_of_detections = static_cast( + *reinterpret_cast(inferResults[3])); + } + + int left, top, right, bottom; + cv::Rect loc; + + FaceDetectionResults results; + results.number_of_faces = 0; + for (int idx = 0; idx < number_of_detections; ++idx) { + if (scores[idx] < mThreshold) + continue; + + left = static_cast(boxes[idx * 4 + 1] * mSourceSize.width); + top = static_cast(boxes[idx * 4 + 0] * mSourceSize.height); + right = static_cast(boxes[idx * 4 + 3] * mSourceSize.width); + bottom = static_cast(boxes[idx * 4 + 2] * mSourceSize.height); + + loc.x = left; + loc.y = top; + loc.width = right - left + 1; + loc.height = bottom - top + 1; + + results.confidences.push_back(scores[idx]); + results.locations.push_back(loc); + results.number_of_faces++; + + LOGI("confidence:%f", scores[idx]); + LOGI("class: %f", classes[idx]); + LOGI("left:%f, top:%f, right:%f, bottom:%f", boxes[idx * 4 + 1], + boxes[idx * 4 + 0], boxes[idx * 4 + 3], boxes[idx * 4 + 2]); + LOGI("left:%d, top:%d, right:%d, bottom:%d", left, top, right, + bottom); + } + + *detectionResults = results; + LOGE("Inference: GetFaceDetectionResults: %d\n", + results.number_of_faces); + } + + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::GetFacialLandMarkDetectionResults( + FacialLandMarkDetectionResults *detectionResults) + { + tensor_t outputData; + + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; + } + + std::vector > inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); + + long number_of_detections = inferDimInfo[0][1]; + float *loc = reinterpret_cast(inferResults[0]); + + FacialLandMarkDetectionResults results; + results.number_of_landmarks = 0; + + cv::Point point(0, 0); + results.number_of_landmarks = 0; + LOGI("imgW:%d, imgH:%d", mSourceSize.width, mSourceSize.height); + for (int idx = 0; idx < number_of_detections; idx += 2) { + point.x = static_cast(loc[idx] * mSourceSize.width); + point.y = static_cast(loc[idx + 1] * mSourceSize.height); + + results.locations.push_back(point); + results.number_of_landmarks++; + + LOGI("x:%d, y:%d", point.x, point.y); + } + + *detectionResults = results; + LOGE("Inference: FacialLandmarkDetectionResults: %d\n", + results.number_of_landmarks); + return MEDIA_VISION_ERROR_NONE; + } + + int Inference::GetPoseLandmarkDetectionResults( + mv_inference_pose_result_h *detectionResults, int width, int height) + { + tensor_t outputData; + + // Get inference result and contain it to outputData. + int ret = FillOutputResult(outputData); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get output result."); + return ret; + } + + std::vector > inferDimInfo(outputData.dimInfo); + std::vector inferResults(outputData.data.begin(), + outputData.data.end()); + + long number_of_poses = 1; + long number_of_landmarks = inferDimInfo[0][3]; + float *tmp = static_cast(inferResults[0]); + cv::Size heatMapSize(inferDimInfo[0][1], inferDimInfo[0][2]); + + cv::Point loc; + cv::Point2f loc2f; + double score; + cv::Mat blurredHeatMap; + + cv::Mat reShapeTest(cv::Size(inferDimInfo[0][2], inferDimInfo[0][1]), + CV_32FC(inferDimInfo[0][3]), (void *) tmp); + + cv::Mat multiChannels[inferDimInfo[0][3]]; + split(reShapeTest, multiChannels); + + float ratioX = static_cast(inferDimInfo[0][2]); + float ratioY = static_cast(inferDimInfo[0][1]); + + if (mPoseResult == NULL) { + if(!mUserListName.empty()) { + number_of_landmarks = mUserListName.size(); + } + mPoseResult = new mv_inference_pose_s; + if (mPoseResult == NULL) { + LOGE("Fail to create result handle"); + return MEDIA_VISION_ERROR_INTERNAL; + } + + mPoseResult->number_of_poses= number_of_poses; + mPoseResult->number_of_landmarks_per_pose = number_of_landmarks; + mPoseResult->landmarks = new mv_inference_landmark_s*[number_of_poses]; + for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { + mPoseResult->landmarks[poseIndex] = new mv_inference_landmark_s[number_of_landmarks]; + for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; ++landmarkIndex) { + mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = false; + mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = -1; + mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = -1; + mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1; + mPoseResult->landmarks[poseIndex][landmarkIndex].score = -1.0f; + } + } + } + + int part = 0; + for (int poseIndex = 0; poseIndex < number_of_poses; ++poseIndex) { + for (int landmarkIndex = 0; landmarkIndex < number_of_landmarks; landmarkIndex++) { + part = landmarkIndex; + if (!mUserListName.empty()) { + part = std::stoi(mUserListName[landmarkIndex]) - 1; + if (part < 0) { + continue; + } + } + + cv::Mat heatMap = multiChannels[part]; + + cv::GaussianBlur(heatMap, blurredHeatMap, cv::Size(), 5.0, 5.0); + cv::minMaxLoc(heatMap, NULL, &score, NULL, &loc); + + loc2f.x = (static_cast(loc.x) / ratioX); + loc2f.y = (static_cast(loc.y) / ratioY); + LOGI("landmarkIndex[%2d] - mapping to [%2d]: x[%.3f], y[%.3f], score[%.3f]", + landmarkIndex, part, loc2f.x, loc2f.y, score); + + mPoseResult->landmarks[poseIndex][landmarkIndex].isAvailable = true; + mPoseResult->landmarks[poseIndex][landmarkIndex].point.x = static_cast(static_cast(width) * loc2f.x); + mPoseResult->landmarks[poseIndex][landmarkIndex].point.y = static_cast(static_cast(height) * loc2f.y); + mPoseResult->landmarks[poseIndex][landmarkIndex].score = score; + mPoseResult->landmarks[poseIndex][landmarkIndex].label = -1; + } + } + + *detectionResults = static_cast(mPoseResult); + + return MEDIA_VISION_ERROR_NONE; + } + +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/InferenceIni.cpp b/mv_machine_learning/mv_inference/inference/src/InferenceIni.cpp new file mode 100644 index 00000000..7dc5fa51 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/InferenceIni.cpp @@ -0,0 +1,104 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "mv_private.h" +#include "InferenceIni.h" + +namespace mediavision +{ +namespace inference +{ + const std::string INFERENCE_INI_FILENAME = + "/inference/inference_engine.ini"; + + static inline std::string &rtrim(std::string &s, + const char *t = " \t\n\r\f\v") + { + s.erase(s.find_last_not_of(t) + 1); + return s; + } + + static inline std::string <rim(std::string &s, + const char *t = " \t\n\r\f\v") + { + s.erase(s.find_first_not_of(t) + 1); + return s; + } + + static inline std::string &trim(std::string &s, + const char *t = " \t\n\r\f\v") + { + return ltrim(rtrim(s, t), t); + } + + InferenceInI::InferenceInI() + : mIniDefaultPath(SYSCONFDIR) + , mDefaultBackend("OPENCV") + , mDelimeter(",") + { + mIniDefaultPath += INFERENCE_INI_FILENAME; + } + + InferenceInI::~InferenceInI() + {} + + int InferenceInI::LoadInI() + { + LOGI("ENTER"); + dictionary *dict = iniparser_load(mIniDefaultPath.c_str()); + if (dict == NULL) { + LOGE("Fail to load ini"); + return -1; + } + + std::string list = std::string(iniparser_getstring( + dict, "inference backend:supported backend types", + (char *) mDefaultBackend.c_str())); + + size_t pos = 0; + while ((pos = list.find(mDelimeter)) != std::string::npos) { + std::string tmp = list.substr(0, pos); + mSupportedInferenceBackend.push_back(atoi(tmp.c_str())); + + list.erase(0, pos + mDelimeter.length()); + } + mSupportedInferenceBackend.push_back(atoi(list.c_str())); + + if (dict) { + iniparser_freedict(dict); + dict = NULL; + } + + LOGI("LEAVE"); + return 0; + } + + void InferenceInI::UnLoadInI() + { + ; + } + + std::vector InferenceInI::GetSupportedInferenceEngines() + { + return mSupportedInferenceBackend; + } + +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp new file mode 100644 index 00000000..28edbe34 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/InputMetadata.cpp @@ -0,0 +1,234 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include +#include "InputMetadata.h" +#include + +namespace mediavision +{ +namespace inference +{ + mv_colorspace_e InputMetadata::ConvertTypeToMD(const std::string& type) + { + mv_colorspace_e colorspace = MEDIA_VISION_COLORSPACE_INVALID; + if (type.empty()) { + LOGE("Invalid type[null]"); + return colorspace; + } + + if (type.compare("RGB888") == 0) { + colorspace = MEDIA_VISION_COLORSPACE_RGB888; + } else if (type.compare("Y800") == 0) { + colorspace = MEDIA_VISION_COLORSPACE_Y800; + } else { + LOGE("Not supported channel type"); + } + + return colorspace; + } + + int InputMetadata::GetTensorInfo(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "tensor_info") == false) { + LOGE("No tensor_info inputmetadata"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + // tensor_info + JsonArray * rootArray = json_object_get_array_member(root, "tensor_info"); + unsigned int elements = json_array_get_length(rootArray); + + std::map().swap(layer); + // TODO: handling error + // FIXEME: LayerInfo.set()?? + for (unsigned int elem = 0; elem < elements; ++elem) { + LayerInfo info; + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + info.name = + static_cast(json_object_get_string_member(pObject,"name")); + LOGI("layer: %s", info.name.c_str()); + + info.shapeType = + static_cast(json_object_get_int_member(pObject, "shape_type")); + LOGI("shape type: %d:%s", info.shapeType, info.shapeType == 0 ? "NCHW" : "NHWC"); + + info.dataType = + static_cast(json_object_get_int_member(pObject, "data_type")); + LOGI("data type : %d:%s", info.dataType, info.dataType == 0 ? "FLOAT32" : "UINT8"); + + const char *colorSpace = static_cast(json_object_get_string_member(pObject,"color_space")); + info.colorSpace = ConvertTypeToMD(std::string(colorSpace)); + LOGI("color space : %d:%s", info.colorSpace, info.colorSpace == MEDIA_VISION_COLORSPACE_RGB888 ? "RGB888" : ""); + + // dims + JsonArray * array = json_object_get_array_member(pObject, "shape_dims"); + unsigned int elements2 = json_array_get_length(array); + LOGI("shape dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + auto dim = static_cast(json_array_get_int_element(array, elem2)); + info.dims.push_back(dim); + LOGI("%d", dim); + } + + layer.insert(std::make_pair(info.name, info)); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int InputMetadata::GetPreProcess(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "preprocess") == false) { + LOGI("No preprocess inputmetadata"); + return MEDIA_VISION_ERROR_NONE; + } + + // preprocess + JsonArray * rootArray = json_object_get_array_member(root, "preprocess"); + unsigned int elements = json_array_get_length(rootArray); + + std::map().swap(option); + // TODO: iterLayer should be the same with elements. + auto iterLayer = layer.begin(); + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem, ++iterLayer) { + Options opt; + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + // normalization + if (json_object_has_member(pObject, "normalization")) { + JsonArray * array = json_object_get_array_member(pObject, "normalization"); + JsonNode * node = json_array_get_element(array, 0); + JsonObject * object = json_node_get_object(node); + + opt.normalization.use = true; + LOGI("use normalization"); + + JsonArray * arrayMean = json_object_get_array_member(object, "mean"); + JsonArray * arrayStd = json_object_get_array_member(object, "std"); + unsigned int elemMean = json_array_get_length(arrayMean); + unsigned int elemStd = json_array_get_length(arrayStd); + if (elemMean != elemStd) { + LOGE("Invalid mean and std values"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + for (unsigned int elem = 0; elem < elemMean; ++elem) { + auto m = static_cast(json_array_get_double_element(arrayMean, elem)); + auto s = static_cast(json_array_get_double_element(arrayStd, elem)); + opt.normalization.mean.push_back(m); + opt.normalization.std.push_back(s); + LOGI("%u: mean[%3.2f], std[%3.2f]", elem, m, s); + } + } + + if (json_object_has_member(pObject, "quantization")) { + JsonArray * array = json_object_get_array_member(pObject, "quantization"); + JsonNode * node = json_array_get_element(array, 0); + JsonObject * object = json_node_get_object(node); + + opt.quantization.use = true; + LOGI("use quantization"); + + JsonArray * arrayScale = json_object_get_array_member(object, "scale"); + JsonArray * arrayZero = json_object_get_array_member(object, "zeropoint"); + unsigned int elemScale = json_array_get_length(arrayScale); + unsigned int elemZero= json_array_get_length(arrayZero); + if (elemScale != elemZero) { + LOGE("Invalid scale and zero values"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + for (unsigned int elem = 0; elem < elemScale; ++elem) { + auto s = static_cast(json_array_get_double_element(arrayScale, elem)); + auto z = static_cast(json_array_get_double_element(arrayZero, elem)); + opt.quantization.scale.push_back(s); + opt.quantization.zeropoint.push_back(z); + LOGI("%u: scale[%3.2f], zeropoint[%3.2f]", elem, s, z); + } + } + option.insert(std::make_pair(iterLayer->first, opt)); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int InputMetadata::Parse(JsonObject *root) + { + LOGI("ENTER"); + + int ret = GetTensorInfo(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetTensorInfo[%d]", ret); + return ret; + } + + ret = GetPreProcess(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetPreProcess[%d]", ret); + return ret; + } + + parsed = true; + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int LayerInfo::GetWidth() const { + if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + return dims[3]; + } else { // INFERENCE_TENSOR_SHAPE_NWHC + return dims[1]; + } + } + + int LayerInfo::GetHeight() const { + if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + return dims[2]; + } else { // INFERENCE_TENSOR_SHAPE_NWHC + return dims[2]; + } + } + + int LayerInfo::GetChannel() const { + if (shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { + return dims[1]; + } else { // INFERENCE_TENSOR_SHAPE_NWHC + return dims[3]; + } + } + +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/Metadata.cpp b/mv_machine_learning/mv_inference/inference/src/Metadata.cpp new file mode 100644 index 00000000..b2ae9ffd --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/Metadata.cpp @@ -0,0 +1,121 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "Metadata.h" + +#include + +#include +#include +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + int Metadata::Init(const std::string& filename) + { + LOGI("ENTER"); + + if (access(filename.c_str(), F_OK | R_OK)) { + LOGE("meta file is in [%s] ", filename.c_str()); + return MEDIA_VISION_ERROR_INVALID_PATH; + } + + mMetafile = filename; + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int Metadata::Parse() + { + LOGI("ENTER"); + + if (mMetafile.empty()) { + LOGE("meta file is empty"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + GError *error = NULL; + JsonNode *node = NULL; + JsonObject *object = NULL; + int ret = MEDIA_VISION_ERROR_NONE; + + JsonParser *parser = json_parser_new(); + if (parser == NULL) { + LOGE("Fail to create json parser"); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + gboolean jsonRet = json_parser_load_from_file(parser, mMetafile.c_str(), &error); + if (!jsonRet) { + LOGE("Unable to parser file %s by %s", mMetafile.c_str(), + error == NULL ? "Unknown" : error->message); + g_error_free(error); + ret = MEDIA_VISION_ERROR_INVALID_DATA; + goto _ERROR_; + } + + node = json_parser_get_root(parser); + if (JSON_NODE_TYPE(node) != JSON_NODE_OBJECT) { + LOGE("Fail to json_parser_get_root. It's an incorrect markup"); + ret = MEDIA_VISION_ERROR_INVALID_DATA; + goto _ERROR_; + } + + object = json_node_get_object(node); + if (!object) { + LOGE("Fail to json_node_get_object. object is NULL"); + ret = MEDIA_VISION_ERROR_INVALID_DATA; + goto _ERROR_; + } + + ret = mInputMeta.Parse(json_object_get_object_member(object, "inputmetadata")); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to parse input Meta[%d]",ret); + goto _ERROR_; + } + + ret = mOutputMeta.Parse(json_object_get_object_member(object, "outputmetadata")); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to parse output meta[%d]",ret); + goto _ERROR_; + } + + _ERROR_ : + g_object_unref(parser); + parser = NULL; + LOGI("LEAVE"); + + return ret; + } + + InputMetadata& Metadata::GetInputMeta() + { + return mInputMeta; + } + + OutputMetadata& Metadata::GetOutputMeta() + { + return mOutputMeta; + } +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp new file mode 100755 index 00000000..b4da65f6 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/ObjectDecoder.cpp @@ -0,0 +1,196 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "ObjectDecoder.h" + +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + int ObjectDecoder::init() + { + if (mBoxInfo.GetDecodingType() == 0) { + if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) || + !mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) { + LOGE("buffer buffers named of %s or %s are NULL", + mMeta.GetLabel().GetName().c_str(), + mMeta.GetNumber().GetName().c_str()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + std::vector indexes = mMeta.GetNumber().GetDimInfo().GetValidIndexAll(); + if (indexes.size() != 1) { + LOGE("Invalid dim size. It should be 1"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + // When GetDecodingType() == 0, the mNumberOfObjects should be set again. + // otherwise, it is set already within ctor. + mNumberOfOjects = mTensorBuffer.getValue( + mMeta.GetNumber().GetName(), indexes[0]); + } else { + if (mBoxInfo.GetDecodeInfo().IsAnchorBoxEmpty()) { + LOGE("Anchor boxes are required but empty."); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + } + + return MEDIA_VISION_ERROR_NONE; + } + + float ObjectDecoder::decodeScore(int idx) + { + float score = mTensorBuffer.getValue(mScoreInfo.GetName(), idx); + if (mScoreInfo.GetType() == 1) { + score = PostProcess::sigmoid(score); + } + + return score < mScoreInfo.GetThresHold() ? 0.0f : score; + } + + Box ObjectDecoder::decodeBox(int idx, float score, int label) + { + // assume type is (cx,cy,w,h) + // left or cx + float cx = mTensorBuffer.getValue(mBoxInfo.GetName(), + idx * mBoxOffset + mBoxInfo.GetOrder()[0]); + // top or cy + float cy = mTensorBuffer.getValue(mBoxInfo.GetName(), + idx * mBoxOffset + mBoxInfo.GetOrder()[1]); + // right or width + float cWidth = mTensorBuffer.getValue(mBoxInfo.GetName(), + idx * mBoxOffset + mBoxInfo.GetOrder()[2]); + // bottom or height + float cHeight = mTensorBuffer.getValue(mBoxInfo.GetName(), + idx * mBoxOffset + mBoxInfo.GetOrder()[3]); + + // when GetType() == 0 (l,t,r,b), then convert it to (cx,cy,w,h) + if (mBoxInfo.GetType() == 0) { + float tmpCx = cx; + float tmpCy = cy; + cx = (cx + cWidth) * 0.5f; // (left + right)/2 + cy = (cy + cHeight) * 0.5f; // (top + bottom)/2 + cWidth = cWidth - tmpCx ; // right - left + cHeight = cHeight - tmpCy; // bottom - top + } + + // normalize if GetCoordinate() == 1 which is pixel coordinate (absolute) + if (mBoxInfo.GetCoordinate() == 1) { + cx /= mScaleW; + cy /= mScaleH; + cWidth /= mScaleW; + cHeight /= mScaleH; + } + + Box box = { + .index = mMeta.GetLabel().GetName().empty() ? + label : + mTensorBuffer.getValue(mMeta.GetLabel().GetName(), idx), + .score = score, + .location = cv::Rect2f(cx, cy, cWidth, cHeight) + }; + + return box; + } + + Box ObjectDecoder::decodeBoxWithAnchor(int idx, int anchorIdx, float score, cv::Rect2f& anchor) + { + // location coordinate of box, the output of decodeBox(), is relative between 0 ~ 1 + Box box = decodeBox(anchorIdx, score, idx); + + if (mBoxInfo.GetDecodeInfo().IsFixedAnchorSize()) { + box.location.x += anchor.x; + box.location.y += anchor.y; + } else { + box.location.x = box.location.x / mBoxInfo.GetDecodeInfo().GetAnchorXscale() * + anchor.width + anchor.x; + box.location.y = box.location.y / mBoxInfo.GetDecodeInfo().GetAnchorYscale() * + anchor.height + anchor.y; + } + + if (mBoxInfo.GetDecodeInfo().IsExponentialBoxScale()) { + box.location.width = anchor.width * + std::exp(box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale()); + box.location.height = anchor.height * + std::exp(box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale()); + } else { + box.location.width = anchor.width * + box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale(); + box.location.height = anchor.height * + box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale(); + } + + return box; + } + + int ObjectDecoder::decode() + { + BoxesList boxList; + int ret = MEDIA_VISION_ERROR_NONE; + + for (int idx = 0; idx < mNumberOfOjects; ++idx) { + if (mBoxInfo.GetType() == 0) { + float score = decodeScore(idx); + if (score <= 0.0f) + continue; + + Box box = decodeBox(idx, score); + mResultBoxes.push_back(box); + } else { + int anchorIdx = -1; + + Boxes boxes; + for (auto& anchorBox : mBoxInfo.GetDecodeInfo().GetAnchorBoxAll()) { + anchorIdx++; + + float score = decodeScore(anchorIdx * mNumberOfOjects + idx); + + if (score <= 0.0f) + continue; + + Box box = decodeBoxWithAnchor(idx, anchorIdx, score, anchorBox); + boxes.push_back(box); + } + boxList.push_back(boxes); + } + } + + if (!boxList.empty()) { + PostProcess postProc; + ret = postProc.Nms(boxList, + mBoxInfo.GetDecodeInfo().GetNmsMode(), + mBoxInfo.GetDecodeInfo().GetNmsThreshold(), + mResultBoxes); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to non-maximum suppression[%d]", ret); + return ret; + } + } + + return ret; + } + + Boxes& ObjectDecoder::getObjectAll() + { + return mResultBoxes; + } +} +} diff --git a/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp new file mode 100755 index 00000000..cbfe6ad6 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/OutputMetadata.cpp @@ -0,0 +1,612 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include +#include "OutputMetadata.h" + +namespace mediavision +{ +namespace inference +{ + int ScoreInfo::ParseScore(JsonObject *root) + { + LOGI("ENTER"); + + JsonArray * rootArray = json_object_get_array_member(root, "score"); + unsigned int elements = json_array_get_length(rootArray); + + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + topNumber = static_cast(json_object_get_int_member(pObject, "top_number")); + LOGI("top number: %d", topNumber); + + threshold = static_cast(json_object_get_double_member(pObject, "threshold")); + LOGI("threshold: %1.3f", threshold); + + type = static_cast(json_object_get_int_member(pObject, "score_type")); + LOGI("score type: %d", type); + + if (json_object_has_member(pObject, "dequantization")) { + array = json_object_get_array_member(pObject, "dequantization"); + JsonNode *node = json_array_get_element(array, 0); + JsonObject *object = json_node_get_object(node); + + deQuantization = std::make_shared( + json_object_get_double_member(object, "scale"), + json_object_get_double_member(object, "zeropoint")); + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseScore(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "score") == false) { + LOGI("No score outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + score.ParseScore(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int BoxInfo::ParseBox(JsonObject *root) + { + LOGI("ENTER"); + + JsonArray * rootArray = json_object_get_array_member(root, "box"); + unsigned int elements = json_array_get_length(rootArray); + + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + + type = static_cast(json_object_get_int_member(pObject, "box_type")); + LOGI("box type: %d", type); + + array = json_object_get_array_member(pObject, "box_order"); + elements2 = json_array_get_length(array); + LOGI("box order should have 4 elements and it has [%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + auto val = static_cast(json_array_get_int_element(array, elem2)); + order.push_back(val); + LOGI("%d", val); + } + + coordinate = static_cast(json_object_get_int_member(pObject, "box_coordinate")); + LOGI("box coordinate: %d", coordinate); + + decodingType = static_cast(json_object_get_int_member(pObject, "decoding_type")); + LOGI("box decodeing type: %d", decodingType); + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + void BoxInfo::DecodeInfo::AddAnchorBox(cv::Rect2f& anchor) + { + anchorBoxes.push_back(anchor); + } + + void BoxInfo::DecodeInfo::ClearAnchorBox() + { + anchorBoxes.clear(); + } + + std::vector& BoxInfo::DecodeInfo::GetAnchorBoxAll() + { + return anchorBoxes; + } + + bool BoxInfo::DecodeInfo::IsAnchorBoxEmpty() + { + return anchorBoxes.empty(); + } + + int OutputMetadata::ParseBox(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "box") == false) { + LOGE("No box outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + box.ParseBox(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int Label::ParseLabel(JsonObject *root) + { + LOGI("ENTER"); + + JsonArray * rootArray = json_object_get_array_member(root, "label"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + } + + LOGI("LEAVEL"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseLabel(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "label") == false) { + LOGE("No box outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + label.ParseLabel(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int Number::ParseNumber(JsonObject *root) + { + // box + JsonArray * rootArray = json_object_get_array_member(root, "number"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + name = json_object_get_string_member(pObject,"name"); + LOGI("layer: %s", name.c_str()); + + JsonArray * array = json_object_get_array_member(pObject, "index"); + unsigned int elements2 = json_array_get_length(array); + LOGI("range dim: size[%u]", elements2); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + if (static_cast(json_array_get_int_element(array, elem2)) == 1) + dimInfo.SetValidIndex(elem2); + } + } + + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseNumber(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "number") == false) { + LOGE("No number outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + number.ParseNumber(root); + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + int OutputMetadata::ParseBoxDecodeInfo(JsonObject *root) + { + LOGI("ENTER"); + + if (json_object_has_member(root, "box") == false) { + LOGE("No box outputmetadata"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + // box + JsonArray * rootArray = json_object_get_array_member(root, "box"); + unsigned int elements = json_array_get_length(rootArray); + + // TODO: handling error + for (unsigned int elem = 0; elem < elements; ++elem) { + JsonNode *pNode = json_array_get_element(rootArray, elem); + JsonObject *pObject = json_node_get_object(pNode); + + if (json_object_has_member(pObject, "decoding_info") == false) { + LOGE("decoding_info is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + JsonObject *cObject = json_object_get_object_member(pObject, "decoding_info"); + if (json_object_has_member(cObject, "anchor") == false) { + LOGE("anchor is mandatory. Invalid metadata"); + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + int ret = box.GetDecodeInfo().ParseAnchorParam(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseAnchorParam[%d]", ret); + return ret; + } + + ret = box.GetDecodeInfo().ParseNms(cObject); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to ParseNms[%d]", ret); + return ret; + } + } + + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + + } + + int BoxInfo::DecodeInfo::ParseAnchorParam(JsonObject *root) + { + JsonObject *object = json_object_get_object_member(root, "anchor") ; + + this->anchorParam.mode = static_cast(json_object_get_int_member(object, "mode")); + + this->anchorParam.numLayers = static_cast(json_object_get_int_member(object, "num_layers")); + this->anchorParam.minScale = static_cast(json_object_get_double_member(object, "min_scale")); + this->anchorParam.maxScale = static_cast(json_object_get_double_member(object, "max_scale")); + this->anchorParam.inputSizeHeight = static_cast(json_object_get_int_member(object, "input_size_height")); + this->anchorParam.inputSizeWidth = static_cast(json_object_get_int_member(object, "input_size_width")); + this->anchorParam.anchorOffsetX = static_cast(json_object_get_double_member(object, "anchor_offset_x")); + this->anchorParam.anchorOffsetY = static_cast(json_object_get_double_member(object, "anchor_offset_y")); + this->anchorParam.isReduceBoxedInLowestLayer = + static_cast(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer")); + this->anchorParam.interpolatedScaleAspectRatio = + static_cast(json_object_get_double_member(object, "interpolated_scale_aspect_ratio")); + this->anchorParam.isFixedAnchorSize = + static_cast(json_object_get_boolean_member(object, "fixed_anchor_size")); + this->anchorParam.isExponentialBoxScale = + static_cast(json_object_get_boolean_member(object, "exponential_box_scale")); + + this->anchorParam.xScale = static_cast(json_object_get_double_member(object, "x_scale")); + this->anchorParam.yScale = static_cast(json_object_get_double_member(object, "y_scale")); + this->anchorParam.wScale = static_cast(json_object_get_double_member(object, "w_scale")); + this->anchorParam.hScale = static_cast(json_object_get_double_member(object, "h_scale")); + + JsonArray * array = json_object_get_array_member(object, "strides"); + unsigned int elements2 = json_array_get_length(array); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + auto stride = static_cast(json_array_get_int_element(array, elem2)); + this->anchorParam.strides.push_back(stride); + LOGI("stride: %d", stride); + } + + array = json_object_get_array_member(object, "aspect_ratios"); + elements2 = json_array_get_length(array); + for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) { + auto aspectRatio = static_cast(json_array_get_double_element(array, elem2)); + this->anchorParam.aspectRatios.push_back(aspectRatio); + LOGI("aspectRatio: %.4f", aspectRatio); + } + + return MEDIA_VISION_ERROR_NONE; + } + + float BoxInfo::DecodeInfo::CalculateScale(float min, float max, int index, int maxStride) + { + return min + (max - min) * 1.0 * index / (maxStride - 1.0f); + } + + bool BoxInfo::DecodeInfo::IsFixedAnchorSize() + { + return this->anchorParam.isFixedAnchorSize;; + } + + bool BoxInfo::DecodeInfo::IsExponentialBoxScale() + { + return this->anchorParam.isExponentialBoxScale; + } + + float BoxInfo::DecodeInfo::GetAnchorXscale() + { + return this->anchorParam.xScale; + } + + float BoxInfo::DecodeInfo::GetAnchorYscale() + { + return this->anchorParam.yScale; + } + + float BoxInfo::DecodeInfo::GetAnchorWscale() + { + return this->anchorParam.wScale; + } + + float BoxInfo::DecodeInfo::GetAnchorHscale() + { + return this->anchorParam.hScale; + } + + int BoxInfo::DecodeInfo::GenerateAnchor() + { + //BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo(); + + if (this->anchorParam.strides.empty() || + this->anchorParam.aspectRatios.empty()) { + LOGE("Invalid anchor parameters"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + int layerId = 0; + this->ClearAnchorBox(); + while (layerId < this->anchorParam.numLayers) { + std::vector anchorHeight; + std::vector anchorWidth; + std::vector aspectRatios; + std::vector scales; + + int lastSameStrideLayer = layerId; + std::vector::iterator iter1, iter2; + while ((lastSameStrideLayer < this->anchorParam.numLayers) && + (this->anchorParam.strides[lastSameStrideLayer] == + this->anchorParam.strides[layerId])) { + const float scale = CalculateScale(this->anchorParam.minScale, + this->anchorParam.maxScale, + lastSameStrideLayer, + this->anchorParam.strides.size()); + + if (lastSameStrideLayer == 0 && + this->anchorParam.isReduceBoxedInLowestLayer) { + aspectRatios.push_back(1.0); + aspectRatios.push_back(2.0); + aspectRatios.push_back(0.5); + scales.push_back(0.1); + scales.push_back(scale); + scales.push_back(scale); + } else { + for (iter1 = this->anchorParam.aspectRatios.begin(); + iter1 != this->anchorParam.aspectRatios.end(); + ++iter1) { + aspectRatios.push_back((*iter1)); + scales.push_back(scale); + } + if (this->anchorParam.interpolatedScaleAspectRatio > 0.0f) { + const float scaleNext = + lastSameStrideLayer == static_cast(this->anchorParam.strides.size()) -1 + ? 1.0f + : CalculateScale(this->anchorParam.minScale, + this->anchorParam.maxScale, + lastSameStrideLayer + 1, + this->anchorParam.strides.size()); + scales.push_back(std::sqrt(scale * scaleNext)); + aspectRatios.push_back(this->anchorParam.interpolatedScaleAspectRatio); + } + } + lastSameStrideLayer++; + } + + for (iter1 = aspectRatios.begin(), iter2 = scales.begin(); + (iter1 != aspectRatios.end() && iter2 != scales.end()); + ++iter1, ++iter2) { + const float ratioSqrts = std::sqrt((*iter1)); + anchorHeight.push_back((*iter2) / ratioSqrts); + anchorWidth.push_back((*iter2) * ratioSqrts); + } + + const int stride = this->anchorParam.strides[layerId]; + int featureMapHeight = std::ceil(1.0f * this->anchorParam.inputSizeHeight / stride); + int featureMapWidth = std::ceil(1.0f * this->anchorParam.inputSizeWidth / stride); + + for (int y = 0; y < featureMapHeight; ++y) { + for (int x = 0; x < featureMapWidth; ++x) { + for (int anchorId = 0; anchorId < (int)anchorHeight.size(); ++anchorId) { + cv::Rect2f anchor = { + cv::Point2f { + (x + this->anchorParam.anchorOffsetX) * 1.0f / featureMapWidth, + (y + this->anchorParam.anchorOffsetY) * 1.0f / featureMapHeight + }, + this->anchorParam.isFixedAnchorSize ? + cv::Size2f {1.0f, 1.0f} : + cv::Size2f {anchorWidth[anchorId], anchorWidth[anchorId]} + }; + this->AddAnchorBox(anchor); + } + } + } + layerId = lastSameStrideLayer; + } + + if (this->IsAnchorBoxEmpty()) { + LOGE("Anchor boxes are empty"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + return MEDIA_VISION_ERROR_NONE; + } + + + int BoxInfo::DecodeInfo::ParseNms(JsonObject *root) + { + if (json_object_has_member(root, "nms") == false) { + LOGI("nms is empty. skip it"); + return MEDIA_VISION_ERROR_NONE; + } + + JsonObject *object = json_object_get_object_member(root, "nms"); + this->nmsParam.mode = static_cast(json_object_get_int_member(object, "mode")); + this->nmsParam.threshold = static_cast(json_object_get_double_member(object,"threshold")); + + return MEDIA_VISION_ERROR_NONE; + } + + int BoxInfo::DecodeInfo::GetNmsMode() + { + return this->nmsParam.mode; + } + + float BoxInfo::DecodeInfo::GetNmsThreshold() + { + return this->nmsParam.threshold; + } + + ScoreInfo& OutputMetadata::GetScore() + { + return score; + } + + BoxInfo& OutputMetadata::GetBox() + { + return box; + } + + Label& OutputMetadata::GetLabel() + { + return label; + } + + Number& OutputMetadata::GetNumber() + { + return number; + } + + bool OutputMetadata::IsParsed() + { + return parsed; + } + + int OutputMetadata::Parse(JsonObject *root) + { + LOGI("ENTER"); + + int ret = ParseScore(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetScore[%d]", ret); + return ret; + } + + ret = ParseBox(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetBox[%d]", ret); + return ret; + } + + if (!box.GetName().empty()) { + // addtional parsing is required according to decoding type + if (box.GetDecodingType() == 0) { + + ret = ParseLabel(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetLabel[%d]", ret); + return ret; + } + + ret = ParseNumber(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetNumber[%d]", ret); + return ret; + } + + } else if (box.GetDecodingType() == 1) { + ret = ParseBoxDecodeInfo(root); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GetBoxDecodeInfo[%d]", ret); + return ret; + } + + ret = box.GetDecodeInfo().GenerateAnchor(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to GenerateAnchor[%d]", ret); + return ret; + } + + } else { + LOGW("Unknow box decoding type. Ignore"); + } + } + + parsed = true; + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + void DimInfo::SetValidIndex(int index) + { + LOGI("ENTER"); + + dims.push_back(index); + + LOGI("LEAVE"); + } + + std::vector DimInfo::GetValidIndexAll() const + { + LOGI("ENTER"); + + LOGI("LEAVE"); + return dims; + } +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/PostProcess.cpp b/mv_machine_learning/mv_inference/inference/src/PostProcess.cpp new file mode 100755 index 00000000..00059b45 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/PostProcess.cpp @@ -0,0 +1,173 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include "PostProcess.h" + +namespace mediavision +{ +namespace inference +{ + float PostProcess::sigmoid(float value) + { + return 1.0/(1.0+ exp(-value)); + } + + float PostProcess::dequant(float value, float scale, float zeropoint) + { + LOGI("ENTER"); + + LOGI("LEAVE"); + return value/scale + zeropoint; + } + + int PostProcess::ScoreClear(int size) + { + LOGI("ENTER"); + + std::priority_queue, + std::vector>, + std::greater>>().swap(mScore); + mMaxScoreSize = size; + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PostProcess::ScorePush(float value, int index) + { + LOGI("ENTER"); + + mScore.push(std::pair(value, index)); + if (mScore.size() > mMaxScoreSize) { + mScore.pop(); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PostProcess::ScorePop(std::vector>& top) + { + LOGI("ENTER"); + + top.clear(); + while (!mScore.empty()) { + top.push_back(mScore.top()); + LOGI("%.3f", mScore.top().first); + mScore.pop(); + } + + std::reverse(top.begin(), top.end()); + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + static bool compareScore(Box box0, Box box1) + { + return box0.score > box1.score; + } + + static float calcIntersectionOverUnion(Box box0, Box box1) + { + + float area0 = box0.location.width * box0.location.height; + float area1 = box1.location.width * box1.location.height; + + if (area0 <= 0.0f || area1 <= 0.0f) + return 0.0f; + + float sx0 = box0.location.x - box0.location.width * 0.5f; + float sy0 = box0.location.y - box0.location.height * 0.5f; + float ex0 = box0.location.x + box0.location.width * 0.5f; + float ey0 = box0.location.y + box0.location.height * 0.5f; + float sx1 = box1.location.x - box1.location.width * 0.5f; + float sy1 = box1.location.y - box1.location.height * 0.5f; + float ex1 = box1.location.x + box1.location.width * 0.5f; + float ey1 = box1.location.y + box1.location.height * 0.5f; + + float xmin0 = std::min (sx0, ex0); + float ymin0 = std::min (sy0, ey0); + float xmax0 = std::max (sx0, ex0); + float ymax0 = std::max (sy0, ey0); + float xmin1 = std::min (sx1, ex1); + float ymin1 = std::min (sy1, ey1); + float xmax1 = std::max (sx1, ex1); + float ymax1 = std::max (sy1, ey1); + + float intersectXmin = std::max(xmin0, xmin1); + float intersectYmin = std::max(ymin0, ymin1); + float intersectXmax = std::min(xmax0, xmax1); + float intersectYmax = std::min(ymax0, ymax1); + + float intersectArea = std::max((intersectYmax - intersectYmin), 0.0f) * + std::max((intersectXmax - intersectXmin), 0.0f); + return intersectArea / (area0 + area1 - intersectArea); + } + + int PostProcess::Nms(BoxesList& boxeslist, int mode, float threshold, Boxes& nmsboxes) + { + LOGI("ENTER"); + + if (mode != 0) { + LOGI("Skip Nms"); + LOGI("LEAVE"); + return MEDIA_VISION_ERROR_NONE; + } + + LOGI("threshold: %.3f", threshold); + bool isIgnore = false; + Boxes candidateBoxes; + for (auto& boxList : boxeslist) { + if (boxList.size() <=0 ) + continue; + + std::sort(boxList.begin(), boxList.end(), compareScore); + candidateBoxes.clear(); + for (auto& decodedBox : boxList) { + isIgnore = false; + for (auto candidateBox = candidateBoxes.rbegin(); candidateBox != candidateBoxes.rend(); ++candidateBox) { + // compare decodedBox with previous one + float iouValue = calcIntersectionOverUnion(decodedBox, (*candidateBox)); + LOGI("iouValue: %.3f", iouValue); + if (iouValue >= threshold) { + isIgnore = true; + break; + } + } + if (!isIgnore) { + candidateBoxes.push_back(decodedBox); + } + } + if (candidateBoxes.size() > 0) { + nmsboxes.insert(nmsboxes.begin(), candidateBoxes.begin(), candidateBoxes.end()); + } + } + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/Posture.cpp b/mv_machine_learning/mv_inference/inference/src/Posture.cpp new file mode 100644 index 00000000..14c0cec6 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/Posture.cpp @@ -0,0 +1,362 @@ +/** + * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "Posture.h" + +#include +#include +#include +#include +#include +#include + +#define POSE_OFFSET_VALUE 20 + +namespace mediavision +{ +namespace inference +{ +Posture::Posture() : + mBvhParser(), + mBvh(), + mPose() +{ + LOGI("ENTER"); + + mMotionToPoseMap.clear(); + mPose.assign(HUMAN_POSE_MAX_LANDMARKS, std::make_pair(false, cv::Point(-1,-1))); + + mPoseParts.assign(HUMAN_POSE_MAX_PARTS, std::make_pair(false, std::vector())); + + LOGI("LEAVE"); +} + +Posture::~Posture() +{ + LOGI("ENTER"); + + std::vector>().swap(mPose); + + LOGI("LEAVE"); +} + + +int Posture::getParts(int parts, + std::vector>& pose, + std::vector>>& posePart) +{ + LOGI("ENTER"); + // head + if (parts & MV_INFERENCE_HUMAN_BODY_PART_HEAD) { + LOGI("HEAD"); + if (pose[0].first == false || pose[1].first == false || pose[2].first == false) { + posePart[0].first = false; + } else { + posePart[0].first = true; + posePart[0].second.push_back(getUnitVectors(pose[0].second, pose[1].second)); + posePart[0].second.push_back(getUnitVectors(pose[1].second, pose[2].second)); + } + } + + // right arm + if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT) { + LOGI("ARM-R"); + if (pose[3].first == false || pose[4].first == false || pose[5].first == false) { + posePart[1].first = false; + } else { + posePart[1].first = true; + posePart[1].second.push_back(getUnitVectors(pose[3].second, pose[4].second)); + posePart[1].second.push_back(getUnitVectors(pose[4].second, pose[5].second)); + } + } + + // left arm + if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT) { + LOGI("ARM-L"); + if (pose[6].first == false || pose[7].first == false || pose[8].first == false) { + posePart[2].first = false; + } else { + posePart[2].first = true; + posePart[2].second.push_back(getUnitVectors(pose[6].second, pose[7].second)); + posePart[2].second.push_back(getUnitVectors(pose[7].second, pose[8].second)); + } + } + + // right leg + if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT) { + LOGI("LEG-R"); + if (pose[10].first == false || pose[11].first == false || pose[12].first == false) { + posePart[3].first = false; + } else { + posePart[3].first = true; + posePart[3].second.push_back(getUnitVectors(pose[10].second, pose[11].second)); + posePart[3].second.push_back(getUnitVectors(pose[11].second, pose[12].second)); + } + } + + // left leg + if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT) { + LOGI("LEG-L"); + if (pose[13].first == false || pose[14].first == false || pose[15].first == false) { + posePart[4].first = false; + } else { + posePart[4].first = true; + posePart[4].second.push_back(getUnitVectors(pose[13].second, pose[14].second)); + posePart[4].second.push_back(getUnitVectors(pose[14].second, pose[15].second)); + + } + } + + // body + if (parts & MV_INFERENCE_HUMAN_BODY_PART_BODY) { + LOGI("BODY"); + if (pose[2].first == false || pose[9].first == false || + pose[10].first == false || pose[13].first == false) { + posePart[5].first = false; + } else { + posePart[5].first = true; + posePart[5].second.push_back(getUnitVectors(pose[2].second, pose[9].second)); + posePart[5].second.push_back(getUnitVectors(pose[9].second, pose[10].second)); + posePart[5].second.push_back(getUnitVectors(pose[9].second, pose[13].second)); + } + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +int Posture::setPoseFromFile(const std::string motionCaptureFilePath, const std::string motionMappingFilePath) +{ + LOGI("ENTER"); + + int ret = MEDIA_VISION_ERROR_NONE; + + // parsing motion capture file + LOGD("%s", motionCaptureFilePath.c_str()); + LOGD("%s", motionMappingFilePath.c_str()); + ret = mBvhParser.parse(motionCaptureFilePath.c_str(), &mBvh); + LOGD("frames: %d",mBvh.num_frames()); + + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to parse a file [%s]", motionCaptureFilePath.c_str()); + return MEDIA_VISION_ERROR_INTERNAL; + } + + mBvh.recalculate_joints_ltm(); + + LOGD("reading motion mapping...."); + // read motion mapping file + std::ifstream fp(motionMappingFilePath.c_str()); + if (!fp.is_open()) { + LOGE("Fail to open %s", motionMappingFilePath.c_str()); + return MEDIA_VISION_ERROR_INVALID_PATH; + } + + std::string line; + mMotionToPoseMap.clear(); + while (!fp.eof()) { + std::getline(fp, line); + + if (line.empty()) + continue; + + LOGD("%s", line.c_str()); + std::istringstream lineStream(line); + std::string token; + std::vector parsedString; + while(getline(lineStream, token, ',')) { + parsedString.push_back(token); + } + LOGD("name: %s, mapping index: %d", parsedString[0].c_str(), std::stoi(parsedString[1])); + mMotionToPoseMap.insert(make_pair(parsedString[0], std::stoi(parsedString[1])-1)); + } + + fp.close(); + LOGD("mapping size is %zd", mMotionToPoseMap.size()); + + // convert bvh to pose + //convertBvhToPose(); + //for (std::shared_ptr) + + float pointX, pointY, minX, minY, maxX, maxY; + minX = minY = FLT_MAX; + maxX = maxY = FLT_MIN; + for (std::shared_ptr joint : mBvh.joints()) { + std::map::iterator it = mMotionToPoseMap.find(std::string(joint->name())); + if (it != mMotionToPoseMap.end()) { + pointX = joint->pos(0)[0]; + pointY = joint->pos(0)[1]; + if (pointX < minX) + minX = pointX; + + if (pointY < minY) + minY = pointY; + + if (pointX > maxX) + maxX = pointX; + + if (pointY > maxY) + maxY = pointY; + + mPose[it->second].first = true; + mPose[it->second].second = cv::Point(pointX, pointY); + LOGD("%d: (%f,%f)", it->second, pointX, pointY); + } + } + + // add offset to make x > 0 and y > 0 + int height = (int)maxY - (int)minY + POSE_OFFSET_VALUE; + for (std::vector>::iterator iter = mPose.begin(); + iter != mPose.end(); iter++) { + if (iter->first == false) + continue; + + iter->second.x -= minX; + iter->second.y -= minY; + + iter->second.x += POSE_OFFSET_VALUE; + iter->second.y += POSE_OFFSET_VALUE; + + iter->second.y = height - iter->second.y; + + LOGI("(%d, %d)", iter->second.x, iter->second.y); + } + + ret = getParts((MV_INFERENCE_HUMAN_BODY_PART_HEAD | + MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT | + MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT | + MV_INFERENCE_HUMAN_BODY_PART_BODY | + MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT | + MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT), + mPose, mPoseParts); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to getPartse"); + return ret; + } + + LOGI("LEAVE"); + + return ret; +} + +cv::Vec2f Posture::getUnitVectors(cv::Point point1, cv::Point point2) +{ + LOGI("ENTER"); + + cv::Vec2i vec(point1.x - point2.x, point1.y - point2.y); + cv::Vec2f unitVec (vec[0]/cv::norm(vec, cv::NORM_L1), vec[1]/cv::norm(vec, cv::NORM_L1)); + + LOGI("LEAVE"); + + return unitVec; +} + +float Posture::cosineSimilarity(std::vector vec1, std::vector vec2, int size) +{ + float numer = 0.0f; + float denom1 = 0.0f; + float denom2 = 0.0f; + + float value = 0.0f; + + for (int k = 0; k < size; ++k) { + numer = denom1 = denom2 = 0.0f; + for (int dim = 0; dim <2; ++dim) { + numer += (vec1[k][dim] * vec2[k][dim]); + denom1 += (vec1[k][dim] * vec1[k][dim]); + denom2 += (vec2[k][dim] * vec2[k][dim]); + } + LOGI("similarity: %f", numer / sqrt( denom1 * denom2)); + value += numer / sqrt( denom1 * denom2); + + } + + return value; +} + +float Posture::getSimilarity(int parts, + std::vector>>& posePart, + std::vector>>& actionPart) +{ + float score = 0.0f; + unsigned int bodyCount = 0; + std::vector index; + + if (parts & MV_INFERENCE_HUMAN_BODY_PART_HEAD) { + index.push_back(0); + } + + if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_RIGHT) { + index.push_back(1); + } + + if (parts & MV_INFERENCE_HUMAN_BODY_PART_ARM_LEFT) { + index.push_back(2); + } + + if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT) { + index.push_back(3); + } + + if (parts & MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT) { + index.push_back(4); + } + + if (parts & MV_INFERENCE_HUMAN_BODY_PART_BODY) { + index.push_back(5); + } + + for (std::vector::iterator it = index.begin(); it != index.end(); ++it) { + if (posePart[(*it)].first && actionPart[(*it)].first && + (posePart[(*it)].second.size() == actionPart[(*it)].second.size())) { + score += cosineSimilarity(posePart[(*it)].second, actionPart[(*it)].second, posePart[(*it)].second.size()); + + bodyCount += posePart[(*it)].second.size(); + LOGI("body[%d], score[%f], count[%u]", (*it), score, bodyCount); + } + } + + if (bodyCount > 0) + score /= (float)bodyCount; + + LOGD("score: %1.3f", score); + + return score; +} + +int Posture::compare(int parts, std::vector> action, float* score) +{ + LOGI("ENTER"); + + std::vector>> actionParts; + actionParts.assign(6, std::make_pair(false, std::vector())); + int ret = getParts(parts, action, actionParts); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to getPartse"); + return ret; + } + + *score = getSimilarity(parts, mPoseParts, actionParts); + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; +} + +} +} // namespace diff --git a/mv_machine_learning/mv_inference/inference/src/PreProcess.cpp b/mv_machine_learning/mv_inference/inference/src/PreProcess.cpp new file mode 100644 index 00000000..fa65ced2 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/PreProcess.cpp @@ -0,0 +1,130 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" + +#include +#include +#include +#include +#include +#include "PreProcess.h" + +const int colorConvertTable[][12] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, -1, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_GRAY2BGR565, cv::COLOR_GRAY2RGB, cv::COLOR_GRAY2RGBA }, + { 0, cv::COLOR_YUV2GRAY_I420, -1, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_RGBA2GRAY, cv::COLOR_YUV2RGBA_I420 }, + { 0, cv::COLOR_YUV2GRAY_NV12, 0, -1, 0, 0, 0, 0, 0, 0, cv::COLOR_YUV2RGB_NV12, cv::COLOR_YUV2RGBA_NV12 }, + { 0, cv::COLOR_YUV2GRAY_YV12, 0, 0, -1, 0, 0, 0, 0, 0, cv::COLOR_YUV2RGB_YV12, cv::COLOR_YUV2RGBA_YV12 }, + { 0, cv::COLOR_YUV2GRAY_NV21, 0, 0, 0, -1, 0, 0, 0, 0, cv::COLOR_YUV2RGB_NV21, cv::COLOR_YUV2RGBA_NV21 }, + { 0, cv::COLOR_YUV2GRAY_YUYV, 0, 0, 0, 0, -1, 0, 0, 0, cv::COLOR_YUV2RGB_YUYV, cv::COLOR_YUV2RGBA_YUYV }, + { 0, cv::COLOR_YUV2GRAY_UYVY, 0, 0, 0, 0, 0, -1, 0, 0, cv::COLOR_YUV2BGR_UYVY, cv::COLOR_YUV2BGRA_UYVY }, + { 0, cv::COLOR_YUV2GRAY_Y422, 0, 0, 0, 0, 0, 0, -1, 0, cv::COLOR_YUV2RGB_Y422, cv::COLOR_YUV2RGBA_Y422 }, + { 0, cv::COLOR_BGR5652GRAY, 0, 0, 0, 0, 0, 0, 0, -1, cv::COLOR_BGR5652BGR, cv::COLOR_BGR5652BGRA }, + { 0, cv::COLOR_RGB2GRAY, 0, 0, 0, 0, 0, 0, 0, 0, -1, cv::COLOR_RGB2RGBA }, + { 0, cv::COLOR_RGBA2GRAY, 0, 0, 0, 0, 0, 0, 0, cv::COLOR_BGRA2BGR565, cv::COLOR_RGBA2RGB, -1} +}; + +namespace mediavision +{ +namespace inference +{ + int PreProcess::Resize(cv::Mat& source, cv::Mat& dest, cv::Size size) + { + LOGI("ENTER"); + + try { + cv::resize(source, dest, size); + } catch (cv::Exception& e) { + LOGE("Fail to resize with msg: %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + + int PreProcess::ColorConvert(cv::Mat& source, cv::Mat& dest, int sType, int dType) + { + LOGI("ENTER"); + + auto conversionColor = static_cast(colorConvertTable[sType][dType]); + if (conversionColor == -1) {/* Don't need conversion */ + dest = source; + } else if (conversionColor > 0) { + /* Class for representation the given image as cv::Mat before conversion */ + cv::cvtColor(source, dest, conversionColor); + } else { + LOGE("Fail to ColorConvert"); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PreProcess::Normalize(cv::Mat& source, cv::Mat& dest, + const std::vector& mean, const std::vector& std) + { + LOGI("ENTER"); + try { + cv::subtract(source, cv::Scalar(mean[0], mean[1], mean[2]), dest); + source = dest; + cv::divide(source, cv::Scalar(std[0], std[1], std[2]), dest); + } catch (cv::Exception& e) { + LOGE("Fail to substract/divide with msg: %s", e.what()); + return MEDIA_VISION_ERROR_INVALID_OPERATION; + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + + int PreProcess::Run(cv::Mat& source, const int colorSpace, + const int dataType, const LayerInfo& layerInfo, + const Options& options, void* buffer) + { + LOGI("ENTER"); + + // dest is a wrapper of the buffer + cv::Mat dest(cv::Size(layerInfo.GetWidth(), layerInfo.GetHeight()), + dataType, buffer); + + cv::Mat cvSource, cvDest; + // cvSource has new allocation with dest.size() + Resize(source, cvSource, dest.size()); + + // cvDest has new allocation if it's colorSpace is not RGB888 + // cvDest share the data with cvSource it's colorSpace is RGB888 + ColorConvert(cvSource, cvDest, colorSpace, layerInfo.colorSpace); + + cvDest.convertTo(dest, dest.type()); + + if (options.normalization.use) { + Normalize(dest, dest, options.normalization.mean, options.normalization.std); + } + + LOGI("LEAVE"); + + return MEDIA_VISION_ERROR_NONE; + } + +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/TensorBuffer.cpp b/mv_machine_learning/mv_inference/inference/src/TensorBuffer.cpp new file mode 100644 index 00000000..6e4fc30c --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/TensorBuffer.cpp @@ -0,0 +1,137 @@ +/** + * Copyright (c) 2021 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "TensorBuffer.h" + +#include + +#include +#include +#include +#include + +namespace mediavision +{ +namespace inference +{ + + bool TensorBuffer::empty() + { + return mTensorBuffer.empty(); + } + + bool TensorBuffer::exist(std::string name) + { + return getTensorBuffer(name) != nullptr; + } + + void TensorBuffer::clear() + { + for (auto& tensorBuffer : mTensorBuffer) { + auto& tBuffer = tensorBuffer.second; + if (tBuffer.owner_is_backend) { + continue; + } + + if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_FLOAT32) { + delete[] static_cast(tBuffer.buffer); + } else if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_INT64) { + delete[] static_cast(tBuffer.buffer); + } else if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT32) { + delete[] static_cast(tBuffer.buffer); + } else if (tBuffer.data_type == INFERENCE_TENSOR_DATA_TYPE_UINT16) { + delete[] static_cast(tBuffer.buffer); + } else { + delete[] static_cast(tBuffer.buffer); + } + } + + LOGI("Tensor(%zu) have been released.", mTensorBuffer.size()); + IETensorBuffer().swap(mTensorBuffer); + } + + size_t TensorBuffer::size() + { + return mTensorBuffer.size(); + } + + IETensorBuffer& TensorBuffer::getAllTensorBuffer() + { + return mTensorBuffer; + } + + inference_engine_tensor_buffer* TensorBuffer::getTensorBuffer(std::string name) + { + if (mTensorBuffer.find(name) == mTensorBuffer.end()){ + return nullptr; + } + + return &mTensorBuffer[name]; + } + + bool TensorBuffer::setTensorBuffer(std::string name, inference_engine_tensor_buffer& buffer) + { + if (name.empty() || + buffer.buffer == nullptr) { + LOGE("Invalid parameters: %s, %p", name.c_str(), buffer.buffer); + return false; + } + + auto ret = mTensorBuffer.insert(std::make_pair(name, buffer)); + if (ret.second == false) { + LOGE("Fail to insert %s with buffer %p", name.c_str(), buffer.buffer); + return false; + } + + return true; + } + + template + T TensorBuffer::getValue(std::string name, int idx) + { + inference_engine_tensor_buffer* tBuffer = + getTensorBuffer(name); + if (tBuffer == nullptr) { + throw std::invalid_argument(name); + } + + switch (tBuffer->data_type) { + case INFERENCE_TENSOR_DATA_TYPE_FLOAT32: + return static_cast(static_cast(tBuffer->buffer)[idx]); + case INFERENCE_TENSOR_DATA_TYPE_INT64: + return static_cast( + static_cast(tBuffer->buffer)[idx]); + case INFERENCE_TENSOR_DATA_TYPE_UINT32: + return static_cast( + static_cast(tBuffer->buffer)[idx]); + case INFERENCE_TENSOR_DATA_TYPE_UINT8: + return static_cast( + static_cast(tBuffer->buffer)[idx]); + case INFERENCE_TENSOR_DATA_TYPE_UINT16: + return static_cast( + static_cast(tBuffer->buffer)[idx]); + default: + break; + } + + throw std::invalid_argument("Invalid data type"); + } + + template float TensorBuffer::getValue(std::string, int); + template int TensorBuffer::getValue(std::string, int); +} /* Inference */ +} /* MediaVision */ diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference.c b/mv_machine_learning/mv_inference/inference/src/mv_inference.c new file mode 100644 index 00000000..454354e8 --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/mv_inference.c @@ -0,0 +1,499 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "mv_inference.h" + +#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT + +/* Include headers of licensed inference module here. */ +//#include "mv_inference_lic.h" + +#else + +/* Include headers of open inference module here. */ +#include "mv_inference_open.h" + +#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ + +/** + * @file mv_inference.c + * @brief This file contains Media Vision inference module. + */ + +int mv_inference_create(mv_inference_h *infer) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_NULL_ARG_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT + + //ret = mv_inference_create_lic(infer); + +#else + + ret = mv_inference_create_open(infer); + +#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_inference_destroy(mv_inference_h infer) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT + + //ret = mv_inference_destroy_lic(infer); + +#else + + ret = mv_inference_destroy_open(infer); + +#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_inference_configure(mv_inference_h infer, + mv_engine_config_h engine_config) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_INSTANCE_CHECK(engine_config); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT + + //ret = mv_inference_configure_lic(infer); + +#else + + ret = mv_inference_configure_engine_open(infer, engine_config); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to configure engine and target"); + return ret; + } + +#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_inference_prepare(mv_inference_h infer) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(infer); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h engine_config = mv_inference_get_engine_config(infer); + +#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT + + //ret = mv_inference_prepare_lic(infer); + +#else + + ret = mv_inference_configure_model_open(infer, engine_config); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to configure model"); + return ret; + } + + // input tensor, input layer + ret = mv_inference_configure_input_info_open(infer, engine_config); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to configure input info"); + return ret; + } + + // output layer + ret = mv_inference_configure_output_info_open(infer, engine_config); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to configure output info"); + return ret; + } + + // maximum candidates, threshold + ret = mv_inference_configure_post_process_info_open(infer, engine_config); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to configure post process info"); + return ret; + } + + ret = mv_inference_prepare_open(infer); + +#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ + + MEDIA_VISION_FUNCTION_LEAVE(); + return ret; +} + +int mv_inference_foreach_supported_engine( + mv_inference_h infer, mv_inference_supported_engine_cb callback, + void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(callback); + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT + + // ret = mv_inference_foreach_supported_engine_lic(infer, callback, user_data); + +#else + + ret = mv_inference_foreach_supported_engine_open(infer, callback, + user_data); + +#endif + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_inference_image_classify(mv_source_h source, mv_inference_h infer, + mv_rectangle_s *roi, + mv_inference_image_classified_cb classified_cb, + void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_image_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(classified_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT + + /* + ret = mv_inference_image_classify_lic(source, infer, classified_cb, user_data); + */ + +#else + + ret = mv_inference_image_classify_open(source, infer, roi, classified_cb, + user_data); + +#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_inference_object_detect(mv_source_h source, mv_inference_h infer, + mv_inference_object_detected_cb detected_cb, + void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_image_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(detected_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENSE_PORT + + /* + ret = mv_inference_object_detect_lic(source, infer, classified_cb, user_data); + */ + +#else + + ret = mv_inference_object_detect_open(source, infer, detected_cb, + user_data); + +#endif /* MEDIA_VISION_INFERENCE_LICENSE_PORT */ + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_inference_face_detect(mv_source_h source, mv_inference_h infer, + mv_inference_face_detected_cb detected_cb, + void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_face_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(detected_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT + /* + ret = mv_inference_face_detect_lic(source, infer, detected_cb, user_data); + */ +#else + + ret = mv_inference_face_detect_open(source, infer, detected_cb, user_data); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; + +#endif +} + +int mv_inference_facial_landmark_detect( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_facial_landmark_detected_cb detected_cb, void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_face_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(detected_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + +#ifdef MEDIA_VISION_INFERENCE_LICENCE_PORT + /* + ret = mv_inference_facial_landmark_detect_lic(source, infer, detected_cb, user_data); + */ +#else + + ret = mv_inference_facial_landmark_detect_open(source, infer, roi, + detected_cb, user_data); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; + +#endif +} + +int mv_inference_pose_landmark_detect( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_pose_landmark_detected_cb detected_cb, void *user_data) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_face_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(source); + MEDIA_VISION_INSTANCE_CHECK(infer); + MEDIA_VISION_NULL_ARG_CHECK(detected_cb); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + + ret = mv_inference_pose_landmark_detect_open(source, infer, roi, + detected_cb, user_data); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_inference_pose_get_number_of_poses(mv_inference_pose_result_h result, int *number_of_poses) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(result); + + MEDIA_VISION_NULL_ARG_CHECK(number_of_poses); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_inference_pose_get_number_of_poses_open(result, number_of_poses); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + + +int mv_inference_pose_get_number_of_landmarks(mv_inference_pose_result_h result, int *number_of_landmarks) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(result); + + MEDIA_VISION_NULL_ARG_CHECK(number_of_landmarks); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_inference_pose_get_number_of_landmarks_open(result, number_of_landmarks); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_inference_pose_get_landmark(mv_inference_pose_result_h result, + int pose_index, int part_index, mv_point_s *location, float *score) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(result); + + MEDIA_VISION_NULL_ARG_CHECK(location); + MEDIA_VISION_NULL_ARG_CHECK(score); + + if (pose_index < 0 || part_index < 0) + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_inference_pose_get_landmark_open(result, pose_index, part_index, location, score); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_inference_pose_get_label(mv_inference_pose_result_h result, int pose_index, int *label) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(result); + + MEDIA_VISION_NULL_ARG_CHECK(label); + + if (pose_index < 0) + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_inference_pose_get_label_open(result, pose_index, label); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + + +int mv_pose_create(mv_pose_h *pose) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_NULL_ARG_CHECK(pose); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_pose_create_open(pose); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_destroy(mv_pose_h pose) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(pose); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_pose_destroy_open(pose); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_set_from_file(mv_pose_h pose, const char *motion_capture_file_path, const char *motion_mapping_file_path) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(pose); + MEDIA_VISION_NULL_ARG_CHECK(motion_capture_file_path); + MEDIA_VISION_NULL_ARG_CHECK(motion_mapping_file_path); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_pose_set_from_file_open(pose, motion_capture_file_path, motion_mapping_file_path); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} + +int mv_pose_compare(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score) +{ + MEDIA_VISION_SUPPORT_CHECK( + __mv_inference_check_system_info_feature_supported()); + MEDIA_VISION_INSTANCE_CHECK(pose); + MEDIA_VISION_INSTANCE_CHECK(action); + MEDIA_VISION_NULL_ARG_CHECK(score); + + MEDIA_VISION_FUNCTION_ENTER(); + + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_pose_compare_open(pose, action, parts, score); + + MEDIA_VISION_FUNCTION_LEAVE(); + + return ret; +} diff --git a/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp new file mode 100644 index 00000000..1c4eb7ed --- /dev/null +++ b/mv_machine_learning/mv_inference/inference/src/mv_inference_open.cpp @@ -0,0 +1,1020 @@ +/** + * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mv_private.h" +#include "mv_inference_open.h" + +#include "Inference.h" +#include "Posture.h" + +#include +#include +#include + +using namespace mediavision::inference; + +static int check_mv_inference_engine_version(mv_engine_config_h engine_config, + bool *is_new_version) +{ + int oldType = 0, newType = 0; + + int ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_TARGET_TYPE, &oldType); + if (ret != MEDIA_VISION_ERROR_NONE) + oldType = -1; + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_TARGET_DEVICE_TYPE, &newType); + if (ret != MEDIA_VISION_ERROR_NONE) + newType = -1; + + // At least one of two target device types of + // media-vision-config.json file should have CPU device. + if (oldType == -1 && newType == -1) + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + + // If values of both types are changed then return an error. + // only one of two types should be used. + if (oldType != MV_INFERENCE_TARGET_CPU && + newType != MV_INFERENCE_TARGET_DEVICE_CPU) { + LOGE("Please use only one of below two device types."); + LOGE("MV_INFERENCE_TARGET_TYPE(deprecated) or MV_INFERENCE_TARGET_DEVICE_TYPE(recommended)."); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGI("oldType = %d, newType = %d", oldType, newType); + + // If default value of only old type is changed then use old type. + // Otherwise, use new type in following cases, + // - all default values of two types aren't changed. + // (oldType == MV_INFERENCE_TARGET_CPU && newType == MV_INFERENCE_TARGET_DEVICE_CPU) + // - default value of only new type is changed. + // (oldType == MV_INFERENCE_TARGET_CPU && (newType != -1 && newType != MV_INFERENCE_TARGET_DEVICE_CPU)) + if ((oldType != -1 && oldType != MV_INFERENCE_TARGET_CPU) && + newType == MV_INFERENCE_TARGET_DEVICE_CPU) + *is_new_version = false; + else + *is_new_version = true; + + return MEDIA_VISION_ERROR_NONE; +} + +mv_engine_config_h mv_inference_get_engine_config(mv_inference_h infer) +{ + Inference *pInfer = static_cast(infer); + return pInfer->GetEngineConfig(); +} + +int mv_inference_create_open(mv_inference_h *infer) +{ + if (infer == NULL) { + LOGE("Handle can't be created because handle pointer is NULL"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + (*infer) = static_cast(new (std::nothrow) Inference()); + + if (*infer == NULL) { + LOGE("Failed to create inference handle"); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + LOGD("Inference handle [%p] has been created", *infer); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_inference_destroy_open(mv_inference_h infer) +{ + if (!infer) { + LOGE("Handle can't be destroyed because handle is NULL"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGD("Destroying inference handle [%p]", infer); + delete static_cast(infer); + LOGD("Inference handle has been destroyed"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_inference_configure_model_open(mv_inference_h infer, + mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + + char *modelConfigFilePath = NULL; + char *modelWeightFilePath = NULL; + char *modelUserFilePath = NULL; + double modelMeanValue = 0.0; + int backendType = 0; + size_t userFileLength = 0; + + // TODO: a temporal variable, later, it should be removed. + std::string metaFilePath; + + ret = mv_engine_config_get_string_attribute( + engine_config, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, + &modelConfigFilePath); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get model configuration file path"); + goto _ERROR_; + } + + ret = mv_engine_config_get_string_attribute( + engine_config, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + &modelWeightFilePath); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get model weight file path"); + goto _ERROR_; + } + + ret = mv_engine_config_get_string_attribute( + engine_config, MV_INFERENCE_MODEL_USER_FILE_PATH, + &modelUserFilePath); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get model user file path"); + goto _ERROR_; + } + + ret = mv_engine_config_get_double_attribute( + engine_config, MV_INFERENCE_MODEL_MEAN_VALUE, &modelMeanValue); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get model mean value"); + goto _ERROR_; + } + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_BACKEND_TYPE, &backendType); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference backend type"); + goto _ERROR_; + } + + if (access(modelWeightFilePath, F_OK)) { + LOGE("weightFilePath in [%s] ", modelWeightFilePath); + ret = MEDIA_VISION_ERROR_INVALID_PATH; + goto _ERROR_; + } + + if ((backendType > MV_INFERENCE_BACKEND_NONE && + backendType < MV_INFERENCE_BACKEND_MAX) && + (backendType != MV_INFERENCE_BACKEND_TFLITE) && + (backendType != MV_INFERENCE_BACKEND_ARMNN) && + (backendType == MV_INFERENCE_BACKEND_MLAPI && + (pInfer->GetTargetType() & MV_INFERENCE_TARGET_DEVICE_CUSTOM)) && + (backendType != MV_INFERENCE_BACKEND_ONE)) { + if (access(modelConfigFilePath, F_OK)) { + LOGE("modelConfigFilePath in [%s] ", modelConfigFilePath); + ret = MEDIA_VISION_ERROR_INVALID_PATH; + goto _ERROR_; + } + } + + userFileLength = strlen(modelUserFilePath); + if (userFileLength > 0 && access(modelUserFilePath, F_OK)) { + LOGE("categoryFilePath in [%s] ", modelUserFilePath); + ret = MEDIA_VISION_ERROR_INVALID_PATH; + goto _ERROR_; + } + + pInfer->ConfigureModelFiles(std::string(modelConfigFilePath), + std::string(modelWeightFilePath), + std::string(modelUserFilePath)); + /* FIXME + * temporal code lines to get a metafile, which has the same name + * with modelsWeightFilePath except the extension. + * Later, it should get a metafilename and the below lines should be + * removed. + */ + metaFilePath = std::string(modelWeightFilePath).substr(0, + std::string(modelWeightFilePath).find_last_of('.')) + ".json"; + LOGI("metaFilePath: %s", metaFilePath.c_str()); + pInfer->ParseMetadata(metaFilePath); + +_ERROR_: + if (modelConfigFilePath) + free(modelConfigFilePath); + + if (modelWeightFilePath) + free(modelWeightFilePath); + + if (modelUserFilePath) + free(modelUserFilePath); + + LOGI("LEAVE"); + + return ret; +} + +int mv_inference_configure_tensor_info_open(mv_inference_h infer, + mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + + int tensorWidth, tensorHeight, tensorDim, tensorCh; + double meanValue, stdValue; + + // This should be one. only one batch is supported + tensorDim = 1; + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_INPUT_TENSOR_WIDTH, &tensorWidth); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get tensor width"); + goto _ERROR_; + } + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_INPUT_TENSOR_HEIGHT, &tensorHeight); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get tensor height"); + goto _ERROR_; + } + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_INPUT_TENSOR_CHANNELS, &tensorCh); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get tensor channels"); + goto _ERROR_; + } + + ret = mv_engine_config_get_double_attribute( + engine_config, MV_INFERENCE_MODEL_MEAN_VALUE, &meanValue); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get meanValue"); + goto _ERROR_; + } + + ret = mv_engine_config_get_double_attribute( + engine_config, MV_INFERENCE_MODEL_STD_VALUE, &stdValue); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get stdValue"); + goto _ERROR_; + } + + pInfer->ConfigureTensorInfo(tensorWidth, tensorHeight, tensorDim, tensorCh, + stdValue, meanValue); + +_ERROR_: + + LOGI("LEAVE"); + + return ret; +} + +int mv_inference_configure_input_info_open(mv_inference_h infer, + mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + + int tensorWidth, tensorHeight, tensorDim, tensorCh; + double meanValue, stdValue; + char *node_name = NULL; + int dataType = 0; + + // This should be one. only one batch is supported + tensorDim = 1; + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_INPUT_TENSOR_WIDTH, &tensorWidth); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get tensor width"); + goto _ERROR_; + } + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_INPUT_TENSOR_HEIGHT, &tensorHeight); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get tensor height"); + goto _ERROR_; + } + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_INPUT_TENSOR_CHANNELS, &tensorCh); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get tensor channels"); + goto _ERROR_; + } + + ret = mv_engine_config_get_double_attribute( + engine_config, MV_INFERENCE_MODEL_MEAN_VALUE, &meanValue); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get meanValue"); + goto _ERROR_; + } + + ret = mv_engine_config_get_double_attribute( + engine_config, MV_INFERENCE_MODEL_STD_VALUE, &stdValue); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get stdValue"); + goto _ERROR_; + } + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_INPUT_DATA_TYPE, &dataType); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get a input tensor data type"); + goto _ERROR_; + } + + ret = mv_engine_config_get_string_attribute( + engine_config, MV_INFERENCE_INPUT_NODE_NAME, &node_name); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get tensor width"); + goto _ERROR_; + } + + pInfer->ConfigureInputInfo( + tensorWidth, tensorHeight, tensorDim, tensorCh, stdValue, meanValue, + dataType, std::vector(1, std::string(node_name))); + +_ERROR_: + + if (node_name) { + free(node_name); + node_name = NULL; + } + + LOGI("LEAVE"); + + return ret; +} + +int mv_inference_configure_engine_open(mv_inference_h infer, + mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + int backendType = 0; + int targetTypes = 0; + int ret = MEDIA_VISION_ERROR_NONE; + + pInfer->SetEngineConfig(engine_config); + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_BACKEND_TYPE, &backendType); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference backend type"); + goto _ERROR_; + } + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_TARGET_DEVICE_TYPE, &targetTypes); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference target type"); + goto _ERROR_; + } + + ret = pInfer->ConfigureBackendType( + (mv_inference_backend_type_e) backendType); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to configure a backend type."); + goto _ERROR_; + } + + bool is_new_version; + + // Check if new inference engine framework or old one. + // new inference engine framework has different mv_inference_target_type_e enumeration values + // to support multiple inference target devices. So in case of old version, + // enumeration value given by user should be converted to new value, which + // will be done at ConfigureTargetTypes callback internally. + // Ps. this function will be dropped with deprecated code version-after-next of Tizen. + ret = check_mv_inference_engine_version(engine_config, &is_new_version); + if (ret != MEDIA_VISION_ERROR_NONE) + goto _ERROR_; + + // Create a inference-engine-common class object and load its corresponding library. + // Ps. Inference engine gets a capability from a given backend by Bind call + // so access to mBackendCapacity should be done after Bind. + ret = pInfer->Bind(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to bind a backend engine."); + } + + if (is_new_version) { + // Use new type. + if (pInfer->ConfigureTargetDevices(targetTypes) != + MEDIA_VISION_ERROR_NONE) { + LOGE("Tried to configure invalid target types."); + goto _ERROR_; + } + } else { + // Convert old type to new one and then use it. + if (pInfer->ConfigureTargetTypes(targetTypes) != + MEDIA_VISION_ERROR_NONE) { + LOGE("Tried to configure invalid target types."); + goto _ERROR_; + } + } + + LOGI("LEAVE"); +_ERROR_: + return ret; +} + +int mv_inference_configure_output_open(mv_inference_h infer, + mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int maxOutput = 0; + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_OUTPUT_MAX_NUMBER, &maxOutput); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference output maximum numbers"); + goto _ERROR_; + } + + pInfer->ConfigureOutput(maxOutput); + + LOGI("LEAVE"); +_ERROR_: + return ret; +} + +int mv_inference_configure_confidence_threshold_open( + mv_inference_h infer, mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + double threshold = 0; + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_engine_config_get_double_attribute( + engine_config, MV_INFERENCE_CONFIDENCE_THRESHOLD, &threshold); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference confidence threshold value"); + goto _ERROR_; + } + + pInfer->ConfigureThreshold(threshold); + + LOGI("LEAVE"); +_ERROR_: + return ret; +} + +int mv_inference_configure_post_process_info_open( + mv_inference_h infer, mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int maxOutput = 0; + double threshold = 0; + int ret = MEDIA_VISION_ERROR_NONE; + + ret = mv_engine_config_get_int_attribute( + engine_config, MV_INFERENCE_OUTPUT_MAX_NUMBER, &maxOutput); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference output maximum numbers"); + goto _ERROR_; + } + + pInfer->ConfigureOutput(maxOutput); + + ret = mv_engine_config_get_double_attribute( + engine_config, MV_INFERENCE_CONFIDENCE_THRESHOLD, &threshold); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference confidence threshold value"); + goto _ERROR_; + } + + pInfer->ConfigureThreshold(threshold); + + LOGI("LEAVE"); +_ERROR_: + return ret; +} + +int mv_inference_configure_output_info_open(mv_inference_h infer, + mv_engine_config_h engine_config) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + int idx = 0; + char **node_names = NULL; + int size = 0; + std::vector names; + ret = mv_engine_config_get_array_string_attribute( + engine_config, MV_INFERENCE_OUTPUT_NODE_NAMES, &node_names, &size); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get _output_node_names"); + goto _ERROR_; + } + + for (idx = 0; idx < size; ++idx) + names.push_back(std::string(node_names[idx])); + + pInfer->ConfigureOutputInfo(names); + +_ERROR_: + + if (node_names) { + for (idx = 0; idx < size; ++idx) { + free(node_names[idx]); + } + free(node_names); + node_names = NULL; + } + + LOGI("LEAVE"); + + return ret; +} + +int mv_inference_prepare_open(mv_inference_h infer) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + + // Pass parameters needed to load model files to a backend engine. + ret = pInfer->Prepare(); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to prepare inference"); + return ret; + } + + // Request to load model files to a backend engine. + ret = pInfer->Load(); + if (ret != MEDIA_VISION_ERROR_NONE) + LOGE("Fail to load model files."); + + LOGI("LEAVE"); + + return ret; +} + +int mv_inference_foreach_supported_engine_open( + mv_inference_h infer, mv_inference_supported_engine_cb callback, + void *user_data) +{ + LOGI("ENTER"); + + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + + //bool isSupported = false; + //char str[1024] = {'\0'}; + std::pair backend; + for (int i = 0; i < MV_INFERENCE_BACKEND_MAX; ++i) { + backend = pInfer->GetSupportedInferenceBackend(i); + callback((backend.first).c_str(), backend.second, user_data); + } + + LOGI("LEAVE"); + + return ret; +} + +int mv_inference_image_classify_open( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_image_classified_cb classified_cb, void *user_data) +{ + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + int numberOfOutputs = 0; + std::vector sources; + std::vector rects; + + sources.push_back(source); + + if (roi != NULL) + rects.push_back(*roi); + + ret = pInfer->Run(sources, rects); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to run inference"); + return ret; + } + + ImageClassificationResults classificationResults; + + ret = pInfer->GetClassficationResults(&classificationResults); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference results"); + return ret; + } + + numberOfOutputs = classificationResults.number_of_classes; + + int *indices = classificationResults.indices.data(); + float *confidences = classificationResults.confidences.data(); + static const int START_CLASS_NUMBER = 10; + static std::vector names(START_CLASS_NUMBER); + + if (numberOfOutputs > START_CLASS_NUMBER) + names.resize(numberOfOutputs); + + LOGE("mv_inference_open: number_of_classes: %d\n", + classificationResults.number_of_classes); + + for (int n = 0; n < numberOfOutputs; ++n) { + LOGE("names: %s", classificationResults.names[n].c_str()); + names[n] = classificationResults.names[n].c_str(); + } + + classified_cb(source, numberOfOutputs, indices, names.data(), confidences, + user_data); + + return ret; +} + +int mv_inference_object_detect_open(mv_source_h source, mv_inference_h infer, + mv_inference_object_detected_cb detected_cb, + void *user_data) +{ + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + int numberOfOutputs = 0; + std::vector sources; + std::vector rects; + + sources.push_back(source); + + ret = pInfer->Run(sources, rects); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to run inference"); + return ret; + } + + ObjectDetectionResults objectDetectionResults; + ret = pInfer->GetObjectDetectionResults(&objectDetectionResults); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference results"); + return ret; + } + + numberOfOutputs = objectDetectionResults.number_of_objects; + + int *indices = objectDetectionResults.indices.data(); + float *confidences = objectDetectionResults.confidences.data(); + static const int START_OBJECT_NUMBER = 20; + static std::vector names(START_OBJECT_NUMBER); + static std::vector locations(START_OBJECT_NUMBER); + + if (numberOfOutputs > START_OBJECT_NUMBER) { + names.resize(numberOfOutputs); + locations.resize(numberOfOutputs); + } + + for (int n = 0; n < numberOfOutputs; ++n) { + LOGE("names: %s", objectDetectionResults.names[n].c_str()); + names[n] = objectDetectionResults.names[n].c_str(); + + locations[n].point.x = objectDetectionResults.locations[n].x; + locations[n].point.y = objectDetectionResults.locations[n].y; + locations[n].width = objectDetectionResults.locations[n].width; + locations[n].height = objectDetectionResults.locations[n].height; + } + + detected_cb(source, numberOfOutputs, indices, names.data(), confidences, + locations.data(), user_data); + + return ret; +} + +int mv_inference_face_detect_open(mv_source_h source, mv_inference_h infer, + mv_inference_face_detected_cb detected_cb, + void *user_data) +{ + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + int numberOfOutputs = 0; + std::vector sources; + std::vector rects; + + sources.push_back(source); + + ret = pInfer->Run(sources, rects); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to run inference"); + return ret; + } + + FaceDetectionResults faceDetectionResults; + ret = pInfer->GetFaceDetectionResults(&faceDetectionResults); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference results"); + return ret; + } + + numberOfOutputs = faceDetectionResults.number_of_faces; + + float *confidences = faceDetectionResults.confidences.data(); + std::vector locations(numberOfOutputs); + + for (int n = 0; n < numberOfOutputs; ++n) { + locations[n].point.x = faceDetectionResults.locations[n].x; + locations[n].point.y = faceDetectionResults.locations[n].y; + locations[n].width = faceDetectionResults.locations[n].width; + locations[n].height = faceDetectionResults.locations[n].height; + } + + detected_cb(source, numberOfOutputs, confidences, locations.data(), + user_data); + + return ret; +} + +int mv_inference_facial_landmark_detect_open( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_facial_landmark_detected_cb detected_cb, void *user_data) +{ + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + int numberOfLandmarks = 0; + std::vector sources; + std::vector rects; + + sources.push_back(source); + + if (roi != NULL) + rects.push_back(*roi); + + ret = pInfer->Run(sources, rects); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to run inference"); + return ret; + } + + FacialLandMarkDetectionResults facialLandMarkDetectionResults; + ret = pInfer->GetFacialLandMarkDetectionResults( + &facialLandMarkDetectionResults); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference results"); + return ret; + } + + numberOfLandmarks = facialLandMarkDetectionResults.number_of_landmarks; + + std::vector locations(numberOfLandmarks); + + for (int n = 0; n < numberOfLandmarks; ++n) { + locations[n].x = facialLandMarkDetectionResults.locations[n].x; + locations[n].y = facialLandMarkDetectionResults.locations[n].y; + } + + detected_cb(source, numberOfLandmarks, locations.data(), user_data); + + return ret; +} + +int mv_inference_pose_landmark_detect_open( + mv_source_h source, mv_inference_h infer, mv_rectangle_s *roi, + mv_inference_pose_landmark_detected_cb detected_cb, void *user_data) +{ + Inference *pInfer = static_cast(infer); + + int ret = MEDIA_VISION_ERROR_NONE; + std::vector sources; + std::vector rects; + + unsigned int width, height; + ret = mv_source_get_width(source, &width); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get width"); + return ret; + } + + ret = mv_source_get_height(source, &height); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get height"); + return ret; + } + + sources.push_back(source); + + if (roi != NULL) + rects.push_back(*roi); + + ret = pInfer->Run(sources, rects); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to run inference"); + return ret; + } + + mv_inference_pose_result_h result = NULL; + ret = pInfer->GetPoseLandmarkDetectionResults( + &result, width, height); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to get inference results"); + return ret; + } + + mv_inference_pose_s *tmp = static_cast(result); + for (int pose = 0; pose < tmp->number_of_poses; ++pose) { + for (int index = 0; index < tmp->number_of_landmarks_per_pose; ++index) { + LOGI("PoseIdx[%2d]: x[%d], y[%d], score[%.3f]", index, + tmp->landmarks[pose][index].point.x, + tmp->landmarks[pose][index].point.y, + tmp->landmarks[pose][index].score); + } + } + + detected_cb(source, result, user_data); + + return ret; +} + +int mv_inference_pose_get_number_of_poses_open( + mv_inference_pose_result_h result, + int *number_of_poses) +{ + mv_inference_pose_s *handle = static_cast(result); + + *number_of_poses = handle->number_of_poses; + + LOGI("%d", *number_of_poses); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_inference_pose_get_number_of_landmarks_open( + mv_inference_pose_result_h result, + int *number_of_landmarks) +{ + mv_inference_pose_s *handle = static_cast(result); + + *number_of_landmarks = handle->number_of_landmarks_per_pose; + + LOGI("%d", *number_of_landmarks); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_inference_pose_get_landmark_open( + mv_inference_pose_result_h result, + int pose_index, + int part_index, + mv_point_s *location, + float *score) +{ + mv_inference_pose_s *handle = static_cast(result); + + if (pose_index < 0 || pose_index >= handle->number_of_poses) + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + + if (part_index < 0 || part_index >= handle->number_of_landmarks_per_pose) + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + + *location = handle->landmarks[pose_index][part_index].point; + + *score = handle->landmarks[pose_index][part_index].score; + + LOGI("[%d]:(%dx%d) - %.4f", pose_index, location->x, location->y, *score); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_inference_pose_get_label_open( + mv_inference_pose_result_h result, + int pose_index, + int *label) +{ + mv_inference_pose_s *handle = static_cast(result); + + if (pose_index < 0 || pose_index >= handle->number_of_poses) + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + + *label = handle->landmarks[pose_index][0].label; + + LOGI("[%d]: label(%d)", pose_index, *label); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_create_open(mv_pose_h *pose) +{ + if (pose == NULL) { + LOGE("Handle can't be created because handle pointer is NULL"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + (*pose) = static_cast(new (std::nothrow) Posture); + + if (*pose == NULL) { + LOGE("Failed to create pose handle"); + return MEDIA_VISION_ERROR_OUT_OF_MEMORY; + } + + LOGD("Inference handle [%p] has been created", *pose); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_destroy_open(mv_pose_h pose) +{ + if (!pose) { + LOGE("Hand can't be destroyed because handle is NULL"); + return MEDIA_VISION_ERROR_INVALID_PARAMETER; + } + + LOGD("Destroy pose handle [%p]", pose); + delete static_cast(pose); + LOGD("Pose handle has been destroyed"); + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_set_from_file_open(mv_pose_h pose, + const char *motionCaptureFilePath, + const char *motionMappingFilePath) +{ + Posture *pPose = static_cast(pose); + + int ret = MEDIA_VISION_ERROR_NONE; + + // check file + if (access(motionCaptureFilePath, F_OK) || access(motionMappingFilePath, F_OK)) { + LOGE("Invalid Motion Capture file path [%s]", motionCaptureFilePath); + LOGE("Invalid Motion Mapping file path [%s]", motionMappingFilePath); + + return MEDIA_VISION_ERROR_INVALID_PATH; + } + + ret = pPose->setPoseFromFile(std::string(motionCaptureFilePath), + std::string(motionMappingFilePath)); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to setPoseFromFile"); + return ret; + } + + return MEDIA_VISION_ERROR_NONE; +} + +int mv_pose_compare_open(mv_pose_h pose, mv_inference_pose_result_h action, int parts, float *score) +{ + Posture *pPose = static_cast(pose); + + int ret = MEDIA_VISION_ERROR_NONE; + + std::vector> actionParts; + + mv_inference_pose_s *pAction = static_cast(action); + + for (int k = 0; k < HUMAN_POSE_MAX_LANDMARKS; ++k) { + if (pAction->landmarks[0][k].point.x == -1 || pAction->landmarks[0][k].point.y == -1) { + actionParts.push_back(std::make_pair(false, cv::Point(-1,-1))); + continue; + } + + actionParts.push_back(std::make_pair(true, cv::Point(pAction->landmarks[0][k].point.x, + pAction->landmarks[0][k].point.y))); + + } + + ret = pPose->compare(parts, actionParts, score); + if (ret != MEDIA_VISION_ERROR_NONE) { + LOGE("Fail to compare"); + return ret; + } + + LOGD("score: %1.4f", *score); + + return MEDIA_VISION_ERROR_NONE; +} diff --git a/test/testsuites/CMakeLists.txt b/test/testsuites/CMakeLists.txt index 389e6118..8cf1dbe7 100644 --- a/test/testsuites/CMakeLists.txt +++ b/test/testsuites/CMakeLists.txt @@ -10,4 +10,4 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/barcode) add_subdirectory(${PROJECT_SOURCE_DIR}/face) add_subdirectory(${PROJECT_SOURCE_DIR}/image) add_subdirectory(${PROJECT_SOURCE_DIR}/surveillance) -add_subdirectory(${PROJECT_SOURCE_DIR}/inference) +add_subdirectory(${PROJECT_SOURCE_DIR}/machine_learning) diff --git a/test/testsuites/inference/CMakeLists.txt b/test/testsuites/inference/CMakeLists.txt deleted file mode 100644 index dcd1434e..00000000 --- a/test/testsuites/inference/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -project(mv_infer_test_suite) -cmake_minimum_required(VERSION 2.6) - -set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG _DEBUG) - -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) - -include_directories(${PROJECT_SOURCE_DIR}) -include_directories(${MV_CAPI_MEDIA_VISION_INC_DIR}) -include_directories(${INC_IMAGE_HELPER}) -include_directories(${INC_VIDEO_HELPER}) -include_directories(${INC_TS_COMMON}) - -MESSAGE("TESTSUITE: ${MV_CAPI_MEDIA_VISION_INC_DIR}") - -file(GLOB MV_INFER_TEST_SUITE_INC_LIST "${PROJECT_SOURCE_DIR}/*.h") -file(GLOB MV_INFER_TEST_SUITE_SRC_LIST "${PROJECT_SOURCE_DIR}/*.c") - -add_executable(${PROJECT_NAME} - ${MV_INFER_TEST_SUITE_INC_LIST} - ${MV_INFER_TEST_SUITE_SRC_LIST} - ${MV_CAPI_MEDIA_VISION_INC_LIST}) - -target_link_libraries(${PROJECT_NAME} ${MV_INFERENCE_LIB_NAME} - mv_image_helper - mv_video_helper - mv_testsuite_common) - -install(TARGETS ${PROJECT_NAME} DESTINATION ${testbin_dir}) diff --git a/test/testsuites/inference/inference_test_suite.c b/test/testsuites/inference/inference_test_suite.c deleted file mode 100644 index bfeabd4e..00000000 --- a/test/testsuites/inference/inference_test_suite.c +++ /dev/null @@ -1,3310 +0,0 @@ -/** - * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -#define ARRAY_SIZE(x) (sizeof((x)) / sizeof((x)[0])) -#define FILE_PATH_SIZE 1024 - -//Image Classification -#define IC_LABEL_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_label.txt" -#define IC_TFLITE_WEIGHT_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" -#define IC_Q_LABEL_PATH \ - "/usr/share/capi-media-vision/models/IC_Q/tflite/ic_label.txt" -#define IC_Q_TFLITE_WEIGHT_PATH \ - "/usr/share/capi-media-vision/models/IC_Q/tflite/ic_tflite_model.tflite" - -/* - * Hosted models - */ -#define IC_LABEL_MOBILENET_V1_224_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v1_label.txt" -#define IC_TFLITE_WEIGHT_MOBILENET_V1_224_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v1_224x224.tflite" -#define IC_LABEL_MOBILENET_V2_224_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v2_label.txt" -#define IC_TFLITE_WEIGHT_MOBILENET_V2_224_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v2_224x224.tflite" -#define IC_LABEL_DENSENET_224_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_densenet_label.txt" -#define IC_TFLITE_WEIGHT_DENSENET_224_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_densenet_224x224.tflite" -#define IC_LABEL_INCEPTION_RESENET_299_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_densenet_label.txt" -#define IC_TFLITE_WEIGHT_INCEPTION_RESENET_299_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_resnet_v2_299x299.tflite" -#define IC_LABEL_INCEPTION_V3_299_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v3_label.txt" -#define IC_TFLITE_WEIGHT_INCEPTION_V3_299_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v3_299x299.tflite" -#define IC_LABEL_INCEPTION_V4_299_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v4_label.txt" -#define IC_TFLITE_WEIGHT_INCEPTION_V4_299_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v4_299x299.tflite" -#define IC_LABEL_NASNET_224_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_nasnet_label.txt" -#define IC_TFLITE_WEIGHT_NASNET_224_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_nasnet_224x224.tflite" -#define IC_LABEL_MNASNET_224_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_mnasnet_label.txt" -#define IC_TFLITE_WEIGHT_MNASNET_224_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_mnasnet_224x224.tflite" -#define IC_LABEL_RESNET_V2_299_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_resnet_v2_label.txt" -#define IC_TFLITE_WEIGHT_RESNET_V2_299_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_resnet_v2_299x299.tflite" -#define IC_LABEL_SQUEEZENET_224_PATH\ - "/usr/share/capi-media-vision/models/IC/tflite/ic_squeezenet_label.txt" -#define IC_TFLITE_WEIGHT_SQUEEZENET_224_PATH \ - "/usr/share/capi-media-vision/models/IC/tflite/ic_squeezenet_224x224.tflite" - -#define IC_VIVANTE_LABEL_PATH \ - "/usr/share/capi-media-vision/models/IC/vivante/ic_label.txt" -#define IC_VIVANTE_WEIGHT_PATH \ - "/usr/share/capi-media-vision/models/IC/vivante/ic_vivante_model.nb" -#define IC_VIVANTE_CONFIG_PATH \ - "/usr/share/capi-media-vision/models/IC/vivante/ic_vivante_model.so" - -#define IC_OPENCV_LABEL_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_label_squeezenet.txt" -#define IC_OPENCV_WEIGHT_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel" -#define IC_OPENCV_CONFIG_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" - -//Object Detection -#define OD_LABEL_PATH \ - "/usr/share/capi-media-vision/models/OD/tflite/od_label.txt" -#define OD_TFLITE_WEIGHT_PATH \ - "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" - -#define OD_OPENCV_LABEL_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_label_mobilenetv1ssd.txt" -#define OD_OPENCV_WEIGHT_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel" -#define OD_OPENCV_CONFIG_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" - -/* - * Hosted models - */ -#define OD_LABEL_MOBILENET_V1_SSD_300_PATH \ - "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v1_ssd_postop_label.txt" -#define OD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH \ - "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v1_ssd_postop_300x300.tflite" -#define OD_LABEL_MOBILENET_V2_SSD_320_PATH \ - "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v2_ssd_label.txt" -#define OD_TFLITE_WEIGHT_MOBILENET_V2_SSD_320_PATH \ - "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v2_ssd_320x320.tflite" - -//Face Detection -#define FD_TFLITE_WEIGHT_PATH \ - "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" - -#define FD_OPENCV_WEIGHT_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel" -#define FD_OPENCV_CONFIG_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" - -//Facial LandmarkDetection -#define FLD_TFLITE_WEIGHT_PATH \ - "/usr/share/capi-media-vision/models/FLD/tflite/fld_tflite_model1.tflite" - -#define FLD_OPENCV_WEIGHT_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel" -#define FLD_OPENCV_CONFIG_CAFFE_PATH \ - "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" - -//Pose Detection -#define PLD_TFLITE_WEIGHT_PATH \ - "/usr/share/capi-media-vision/models/PLD/tflite/pld-tflite-001.tflite" -#define PLD_POSE_LABEL_PATH \ - "/usr/share/capi-media-vision/models/PLD/tflite/pose-label.txt" -#define PLD_MOTION_CAPTURE_FILE_PATH \ - "/usr/share/capi-media-vision/models/PLD/mocap/example.bvh" -#define PLD_MOTION_CAPTURE_MAPPING_FILE_PATH \ - "/usr/share/capi-media-vision/models/PLD/mocap/example-mocap-mapping.txt" - -/****** - * Public model: - * IC: mobilenet caffe, tf? - * OD: mobilenetv1-ssd caffe, tf? - * FD: caffe, tf - * FLD: caffe, tf - * PD: cpm model, tf and tflite. - * link : https://github.com/edvardHua/PoseEstimationForMobile/tree/master/release/cpm_model - * Ps. media vision supports cpm and hourglass models for pose estimation for now. - */ - -#define NANO_PER_SEC ((__clock_t) 1000000000) -#define NANO_PER_MILLI ((__clock_t) 1000000) -#define MILLI_PER_SEC ((__clock_t) 1000) - -struct timespec diff(struct timespec start, struct timespec end) -{ - struct timespec temp; - if ((end.tv_nsec - start.tv_nsec) < 0) { - temp.tv_sec = end.tv_sec - start.tv_sec - 1; - temp.tv_nsec = NANO_PER_SEC + end.tv_nsec - start.tv_nsec; - } else { - temp.tv_sec = end.tv_sec - start.tv_sec; - temp.tv_nsec = end.tv_nsec - start.tv_nsec; - } - return temp; -} - -unsigned long gettotalmillisec(const struct timespec time) -{ - return time.tv_sec * MILLI_PER_SEC + time.tv_nsec / NANO_PER_MILLI; -} - -void _object_detected_cb(mv_source_h source, const int number_of_objects, - const int *indices, const char **names, - const float *confidences, - const mv_rectangle_s *locations, void *user_data) -{ - printf("In callback: %d objects\n", number_of_objects); - - for (int n = 0; n < number_of_objects; n++) { - printf("%2d\n", indices[n]); - printf("%s\n", names[n]); - printf("%.3f\n", confidences[n]); - printf("%d,%d,%d,%d\n", locations[n].point.x, locations[n].point.y, - locations[n].width, locations[n].height); - } -} - -void _face_detected_cb(mv_source_h source, const int number_of_faces, - const float *confidences, - const mv_rectangle_s *locations, void *user_data) -{ - printf("In callback: %d faces\n", number_of_faces); - - for (int n = 0; n < number_of_faces; n++) { - printf("%.3f\n", confidences[n]); - printf("%d,%d,%d,%d\n", locations[n].point.x, locations[n].point.y, - locations[n].width, locations[n].height); - } -} - -void _facial_landmark_detected_cb(mv_source_h source, - const int number_of_landmarks, - const mv_point_s *locations, void *user_data) -{ - printf("In callback, %d landmarks\n", number_of_landmarks); - for (int n = 0; n < number_of_landmarks; n++) { - printf("%d: x[%d], y[%d]\n", n, locations[n].x, locations[n].y); - } -} - -void _pose_landmark_detected_cb(mv_source_h source, - mv_inference_pose_result_h pose, void *user_data) -{ - int cb_number_of_poses = 0; - int cb_number_of_landmarks = 0; - mv_inference_pose_get_number_of_poses(pose, &cb_number_of_poses); - mv_inference_pose_get_number_of_landmarks(pose, &cb_number_of_landmarks); - printf("%d pose with %d landmarks\n",cb_number_of_poses, cb_number_of_landmarks); - - mv_point_s point; - float score; - for (int k = 0; k < cb_number_of_poses; ++k) - for (int n = 0; n < cb_number_of_landmarks; n++) { - mv_inference_pose_get_landmark(pose, k, n, &point, &score); - printf("%d-%d: x[%d], y[%d] with %.4f\n", k, n, point.x, point.y, score); - } - - mv_pose_h poser; - float poseScore; - int ret = mv_pose_create(&poser); - if (ret != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mv_pose_h"); - return; - } - - ret = mv_pose_set_from_file(poser, - PLD_MOTION_CAPTURE_FILE_PATH, - PLD_MOTION_CAPTURE_MAPPING_FILE_PATH); - if (ret != MEDIA_VISION_ERROR_NONE) { - mv_pose_destroy(poser); - printf("Fail to mv_pose_set_from_file"); - return; - } - - ret = mv_pose_compare(poser, pose, - (MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT | MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT), - &poseScore); - if (ret != MEDIA_VISION_ERROR_NONE) { - mv_pose_destroy(poser); - printf("Fail to mv_pose_compare"); - return; - } - - printf("[Leg]:Left&Right - poseScore:[%1.4f]", poseScore); - - ret = mv_pose_destroy(poser); - if (ret != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy mv_pose_h but keep going.."); - } - - return; -} - -void _image_classified_cb(mv_source_h source, const int number_of_classes, - const int *indices, const char **names, - const float *confidences, void *user_data) -{ - printf("In callback: %d classes\n", number_of_classes); - - for (int n = 0; n < number_of_classes; ++n) { - printf("%2d\n", indices[n]); - printf("%s\n", names[n]); - printf("%.3f\n", confidences[n]); - } -} - -int show_menu(const char *title, const int *options, const char **names, - int cnt) -{ - printf("*********************************************\n"); - printf("* %38s *\n", title); - printf("*-------------------------------------------*\n"); - int i = 0; - for (i = 0; i < cnt; ++i) - printf("* %2i. %34s *\n", options[i], names[i]); - - printf("*********************************************\n\n"); - int selection = 0; - printf("Your choice: "); - if (scanf("%20i", &selection) == 0) { - if (scanf("%*[^\n]%*c") != 0) { - printf("ERROR: Reading the input line error.\n"); - return -1; - } - printf("ERROR: Incorrect input.\n"); - } - - return selection; -} - -int perform_configure_set_model_config_path(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - char *filePath = NULL; - while (-1 == input_string("Model configuration file path:", FILE_PATH_SIZE, - &(filePath))) { - printf("Incorrect file path! Try again.\n"); - } - - err = mv_engine_config_set_string_attribute( - engine_cfg, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, filePath); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set model configuration file path: %s\n", filePath); - } - - free(filePath); - filePath = NULL; - - return err; -} - -int perform_configure_set_model_weights_path(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - char *filePath = NULL; - while (-1 == input_string("Model weights file path:", FILE_PATH_SIZE, - &(filePath))) { - printf("Incorrect file path! Try again.\n"); - } - - err = mv_engine_config_set_string_attribute( - engine_cfg, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, filePath); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set model weights file path: %s\n", filePath); - } - - free(filePath); - filePath = NULL; - - return err; -} - -int perform_configure_set_input_data_type(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - int dataType = 0; - while (-1 == input_int("Input Tensor Data Type:", 0, 4, &dataType)) { - printf("Invalid type! Try again.\n"); - } - - err = mv_engine_config_set_int_attribute( - engine_cfg, MV_INFERENCE_INPUT_DATA_TYPE, - (mv_inference_data_type_e) dataType); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set input tensor data type: %d\n", dataType); - } - - return err; -} - -int perform_configure_set_model_userfile_path(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - char *filePath = NULL; - while (-1 == input_string("Model user file (category list) path:", - FILE_PATH_SIZE, &(filePath))) { - printf("Incorrect file path! Try again.\n"); - } - - err = mv_engine_config_set_string_attribute( - engine_cfg, MV_INFERENCE_MODEL_USER_FILE_PATH, filePath); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set model user file path: %s\n", filePath); - } - - free(filePath); - filePath = NULL; - - return err; -} - -int perform_configure_set_model_mean_value(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - double meanValue = 0.0; - while (-1 == input_double("mean value:", 0.0, 255.0, &meanValue)) { - printf("Invalid value! Try again.\n"); - } - - err = mv_engine_config_set_double_attribute( - engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, meanValue); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set model mean value: %f\n", meanValue); - } - - return err; -} - -int perform_configure_set_image_scale(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - double stdValue = 0.0; - while (-1 == input_double("Image scale factor:", 1.0, 255.0, &stdValue)) { - printf("Invalid value! Try again.\n"); - } - - err = mv_engine_config_set_double_attribute( - engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, stdValue); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set std value: %lf\n", stdValue); - } - - return err; -} - -int perform_configure_set_confidence_threshold(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - double threshold = 0.0; - while (-1 == input_double("threshold:", 0.0, 1.0, &threshold)) { - printf("Invalid value! Try again.\n"); - } - - err = mv_engine_config_set_double_attribute( - engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, threshold); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set image scale value: %lf\n", threshold); - } - - return err; -} - -int perform_configure_set_backend(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - int backendType = 0; - while (-1 == input_int("Backend Type:", 1, 3, &backendType)) { - printf("Invalid type! Try again.\n"); - } - - err = mv_engine_config_set_int_attribute( - engine_cfg, MV_INFERENCE_BACKEND_TYPE, - (mv_inference_backend_type_e) backendType); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set backend type: %d\n", backendType); - } - - return err; -} - -int perform_configure_set_target(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - int targetType = 0; - while (-1 == input_int("Backend Type:", 1, 2, &targetType)) { - printf("Invalid type! Try again.\n"); - } - - err = mv_engine_config_set_int_attribute( - engine_cfg, MV_INFERENCE_TARGET_TYPE, - (mv_inference_target_type_e) targetType); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set target type: %d\n", targetType); - } - - return err; -} - -int perform_configure_set_tensor_width(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - int tensorW = 0; - while (-1 == input_int("Tensor Width:", INT_MIN, INT_MAX, &tensorW)) { - printf("Invalid value! Try again.\n"); - } - - err = mv_engine_config_set_int_attribute( - engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, tensorW); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set tensor width: %d\n", tensorW); - } - - return err; -} - -int perform_configure_set_tensor_height(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - int tensorH = 0; - while (-1 == input_int("Tensor Height:", INT_MIN, INT_MAX, &tensorH)) { - printf("Invalid value! Try again.\n"); - } - - err = mv_engine_config_set_int_attribute( - engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, tensorH); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set tensor height: %d\n", tensorH); - } - - return err; -} - -int perform_configure_set_tensor_channels(mv_engine_config_h engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - int tensorC = 0; - while (-1 == input_int("Tensor Channels:", INT_MIN, INT_MAX, &tensorC)) { - printf("Invalid value! Try again.\n"); - } - - err = mv_engine_config_set_int_attribute( - engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, tensorC); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to set tensor channels: %d\n", tensorC); - } - - return err; -} - -int perform_configuration(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; - const char *names[] = { "Set Model Configuration", - "Set Model Weights", - "Set Model Data Type", - "Set Model UserFile", - "Set Model MeanFile", - "Set Image Scale", - "Set Confidence Threshold", - "Set Backend", - "Set Target", - "Set InputTensor Width", - "Set InputTensor Height", - "Set InputTensor Channels", - "Back" }; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - while (sel_opt == 0) { - sel_opt = show_menu("Select Actions: ", options, names, - ARRAY_SIZE(options)); - switch (sel_opt) { - case 1: - err = perform_configure_set_model_config_path(handle); - sel_opt = 0; - break; - case 2: - err = perform_configure_set_model_weights_path(handle); - sel_opt = 0; - break; - case 3: - err = perform_configure_set_input_data_type(handle); - sel_opt = 0; - break; - case 4: - err = perform_configure_set_model_userfile_path(handle); - sel_opt = 0; - break; - case 5: - err = perform_configure_set_model_mean_value(handle); - sel_opt = 0; - break; - case 6: - err = perform_configure_set_image_scale(handle); - sel_opt = 0; - break; - case 7: - err = perform_configure_set_confidence_threshold(handle); - sel_opt = 0; - break; - case 8: - err = perform_configure_set_backend(handle); - sel_opt = 0; - break; - case 9: - err = perform_configure_set_target(handle); - sel_opt = 0; - break; - case 10: - err = perform_configure_set_tensor_width(handle); - sel_opt = 0; - break; - case 11: - err = perform_configure_set_tensor_height(handle); - sel_opt = 0; - break; - case 12: - err = perform_configure_set_tensor_channels(handle); - sel_opt = 0; - break; - case 13: - err = MEDIA_VISION_ERROR_NONE; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - *engine_cfg = handle; - return err; -} - -int perform_tflite_mobilenetv1_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "input_2"; - const char *outputNodeName[] = { "dense_3/Softmax" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, IC_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 224); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 224); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_armnn_mobilenetv1_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "input_2"; - const char *outputNodeName[] = { "dense_3/Softmax" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, IC_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_ARMNN); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 224); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 224); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_one_mobilenetv1_quant_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "input"; - const char *outputNodeName[] = { "MobilenetV1/Predictions/Reshape_1" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, IC_Q_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_UINT8); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_Q_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_ONE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 224); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 224); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_vivante_inceptionv3_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "input"; - const char *outputNodeName[] = { "InceptionV3/Predictions/Peshape_1" }; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_VIVANTE_WEIGHT_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - IC_VIVANTE_CONFIG_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_UINT8); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_VIVANTE_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_MLAPI); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_DEVICE_TYPE, - MV_INFERENCE_TARGET_DEVICE_CUSTOM); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 299); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 299); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_opencv_caffe_squeezenet_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "data"; - const char *outputNodeName[] = { "prob" }; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_OPENCV_WEIGHT_CAFFE_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - IC_OPENCV_CONFIG_CAFFE_PATH); - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_OPENCV_LABEL_CAFFE_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_OPENCV); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 227); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 227); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - - -int perform_hosted_tflite_mobilenetv1_224_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_MOBILENET_V1_224_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_MOBILENET_V1_224_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_mobilenetv2_224_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_MOBILENET_V2_224_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_MOBILENET_V2_224_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_densenet_224_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_DENSENET_224_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_DENSENET_224_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_inception_resnet_299_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_INCEPTION_RESENET_299_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_INCEPTION_RESENET_299_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_inception_v3_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_INCEPTION_V3_299_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_INCEPTION_V3_299_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_inception_v4_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_INCEPTION_V4_299_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_INCEPTION_V4_299_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_nasnet_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_NASNET_224_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_NASNET_224_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_mnasnet_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_MNASNET_224_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_MNASNET_224_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_resnet_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_RESNET_V2_299_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_RESNET_V2_299_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_squeezenet_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - IC_TFLITE_WEIGHT_SQUEEZENET_224_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - IC_LABEL_SQUEEZENET_224_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_image_classification() -{ - int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19 }; - const char *names[] = { "Configuration", - "TFLite(cpu + Mobilenet)", - "OpenCV(cpu + Squeezenet)", - "ARMNN(cpu + Mobilenet)", - "ONE(cpu + Mobilenet_Q)", - "Vivante(NPU + Inceptionv3)", - "Hosted: TFLite(cpu + Mobilenet V1)", // 7 - "Hosted: TFLite(cpu + Mobilenet V2)", - "Hosted: TFLite(cpu + Densenet)", - "Hosted: TFLite(cpu + Inception Resnet)", - "Hosted: TFLite(cpu + Inception V3)", - "Hosted: TFLite(cpu + Inception V4)", - "Hosted: TFLite(cpu + Nasnet)", - "Hosted: TFLite(cpu + Mnasnet)", - "Hosted: TFLite(cpu + Resnet)", - "Hosted: TFLite(cpu + Squeezenet)", //16 - "Prepare", - "Run", - "Back" }; - - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; - - while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - // perform TFLite - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_tflite_mobilenetv1_config(&engine_cfg); - } break; - - case 3: { - // perform OpenCV - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_opencv_caffe_squeezenet_config(&engine_cfg); - } break; - case 4: { - // perform ARMNN - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_armnn_mobilenetv1_config(&engine_cfg); - } break; - case 5: { - // perform ONE(On-device Neural Engine) - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_one_mobilenetv1_quant_config(&engine_cfg); - } break; - case 6: { - // perform Vivante - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_vivante_inceptionv3_config(&engine_cfg); - } break; - case 7: { - // perform hosted mobilenetv1 - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_mobilenetv1_224_config(&engine_cfg); - } break; - case 8: { - // perform hosted mobilenetv2 - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_mobilenetv2_224_config(&engine_cfg); - } break; - case 9: { - // perform hosted densenet - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_densenet_224_config(&engine_cfg); - } break; - case 10: { - // perform hosted inception resnet - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_inception_resnet_299_config(&engine_cfg); - } break; - case 11: { - // perform hosted inception v3 - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_inception_v3_config(&engine_cfg); - } break; - case 12: { - // perform hosted inception v4 - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_inception_v4_config(&engine_cfg); - } break; - case 13: { - // perform hosted nasnet - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_nasnet_config(&engine_cfg); - } break; - case 14: { - // perform hosted mnasnet - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_mnasnet_config(&engine_cfg); - } break; - case 15: { - // perform hosted resnet - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_resnet_config(&engine_cfg); - } break; - case 16: { - // perform hosted squeezenet - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_squeezenet_config(&engine_cfg); - } break; - case 17: { - //create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - // configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle\n"); - break; - } - - // prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle."); - break; - } - } break; - case 18: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource.\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (MEDIA_VISION_ERROR_NONE != err) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource. error code:%i\n", err2); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Run - err = mv_inference_image_classify(mvSource, infer, NULL, - _image_classified_cb, NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); - - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - - } break; - case 19: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } - - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code:%i\n", err); - } - - sel_opt = 0; - const int options_last[2] = { 1, 2 }; - const char *names_last[2] = { "Yes", "No" }; - - while (sel_opt == 0) { - sel_opt = - show_menu("Run Image Classification again?: ", options_last, - names_last, ARRAY_SIZE(options_last)); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; - } - - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - - return MEDIA_VISION_ERROR_NONE; -} - -/* - * - * Object Detection - * - */ -int perform_tflite_mobilenetv1ssd_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "normalized_input_image_tensor"; - const char *outputNodeName[] = { "TFLite_Detection_PostProcess", - "TFLite_Detection_PostProcess:1", - "TFLite_Detection_PostProcess:2", - "TFLite_Detection_PostProcess:3" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, OD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, OD_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); - - *engine_cfg = handle; - return err; -} - -int perform_opencv_mobilenetv1ssd_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "data"; - const char *outputNodeName[1] = { "detection_out" }; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - OD_OPENCV_WEIGHT_CAFFE_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - OD_OPENCV_CONFIG_CAFFE_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_USER_FILE_PATH, - OD_OPENCV_LABEL_CAFFE_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_OPENCV); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_armnn_mobilenetv1ssd_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "normalized_input_image_tensor"; - const char *outputNodeName[] = { "TFLite_Detection_PostProcess", - "TFLite_Detection_PostProcess:1", - "TFLite_Detection_PostProcess:2", - "TFLite_Detection_PostProcess:3" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, OD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, OD_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_ARMNN); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_mobilenetv1ssd_300_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - OD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - OD_LABEL_MOBILENET_V1_SSD_300_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_hosted_tflite_mobilenetv2ssd_320_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - OD_TFLITE_WEIGHT_MOBILENET_V2_SSD_320_PATH); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, - OD_LABEL_MOBILENET_V2_SSD_320_PATH); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute( - handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - *engine_cfg = handle; - return err; -} - -int perform_object_detection() -{ - int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; - const char *names[] = { "Configuration", - "TFLITE(CPU) + MobileNetV1+SSD", - "OPENCV(CPU) + MobileNetV1+SSD", - "ARMNN(CPU) + MobileNetV1+SSD", - "Hosted: TFLite(cpu + MobilenetV1+SSD)", - "Hosted: TFLite(cpu + MobilenetV2+SSD)", - "Prepare", - "Run", - "Back" }; - - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; - - while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - //perform TFlite MobileSSD config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_tflite_mobilenetv1ssd_config(&engine_cfg); - } break; - case 3: { - //perform OpenCV MobileSSD config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_opencv_mobilenetv1ssd_config(&engine_cfg); - } break; - case 4: { - //perform ARMNN MobileSSD config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_armnn_mobilenetv1ssd_config(&engine_cfg); - } break; - case 5: { - //perform hosted mobilenet v1 + ssd - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_mobilenetv1ssd_300_config(&engine_cfg); - } break; - case 6: { - //perform hosted mobilenet v2 + ssd - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_hosted_tflite_mobilenetv2ssd_320_config(&engine_cfg); - } break; - case 7: { - // create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - //configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle [err:%i]\n", err); - break; - } - - //prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle"); - break; - } - } break; - case 8: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource.\n"); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Object Detect - err = mv_inference_object_detect(mvSource, infer, - _object_detected_cb, NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); - - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - - } break; - case 9: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource.\n"); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } - - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code:%i\n", err); - } - - sel_opt = 0; - const int options_last[2] = { 1, 2 }; - const char *names_last[2] = { "Yes", "No" }; - - while (sel_opt == 0) { - sel_opt = show_menu("Run Object Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; - } - - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - return MEDIA_VISION_ERROR_NONE; -} - -int perform_tflite_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "normalized_input_image_tensor"; - const char *outputNodeName[] = { "TFLite_Detection_PostProcess", - "TFLite_Detection_PostProcess:1", - "TFLite_Detection_PostProcess:2", - "TFLite_Detection_PostProcess:3" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); - - *engine_cfg = handle; - return err; -} - -int perform_opencv_resnet10ssd_face(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "data"; - const char *outputNodeName[] = { "detection_out" }; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FD_OPENCV_WEIGHT_CAFFE_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - FD_OPENCV_CONFIG_CAFFE_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 135.7); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_OPENCV); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_armnn_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "normalized_input_image_tensor"; - const char *outputNodeName[] = { "TFLite_Detection_PostProcess", - "TFLite_Detection_PostProcess:1", - "TFLite_Detection_PostProcess:2", - "TFLite_Detection_PostProcess:3" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_ARMNN); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 300); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 300); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); - - *engine_cfg = handle; - return err; -} - -int perform_face_detection() -{ - int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6, 7 }; - const char *names[] = { "Configuration", - "TFLite(CPU) + MobileNetV1 + SSD", - "OPENCV(CPU) + Resnet10 + SSD", - "ARMNN(CPU) + MobileNetV1 + SSD", - "Prepare", - "Run", - "Back" }; - - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; - - while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - //perform TF Mobilenetssd config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_tflite_mobilenetv1ssd_face(&engine_cfg); - } break; - case 3: { - //perform TF Lite Mobilenetssd config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_opencv_resnet10ssd_face(&engine_cfg); - } break; - case 4: { - //perform TF Lite Mobilenetssd config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - - err = perform_armnn_mobilenetv1ssd_face(&engine_cfg); - } break; - case 5: { - // create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - //configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle [err:%i]\n", err); - break; - } - - //prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle"); - break; - } - } break; - case 6: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource. error code:%i\n", err2); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Object Detect - err = mv_inference_face_detect(mvSource, infer, _face_detected_cb, - NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); - - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - } break; - case 7: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } - - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code:%i\n", err); - } - - sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - - while (sel_opt == 0) { - sel_opt = show_menu("Run Face Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; - } - - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - - return MEDIA_VISION_ERROR_NONE; -} - -int perform_tflite_TweakCNN(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "INPUT_TENSOR_NAME"; - const char *outputNodeName[] = { "OUTPUT_TENSOR_NAME" }; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FLD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 128); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 128); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_opencv_cnncascade(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "data"; - const char *outputNodeName[] = { "Sigmoid_fc2" }; - - mv_engine_config_set_string_attribute(handle, - MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, - FLD_OPENCV_WEIGHT_CAFFE_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, - FLD_OPENCV_CONFIG_CAFFE_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 127.5); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 127.5); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_OPENCV); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 128); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 128); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - -int perform_facial_landmark_detection() -{ - int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5, 6 }; - const char *names[] = { "Configuration", - "Tflite(CPU) + TweakCNN", - "OPENCV(CPU) + TweakCNN", - "Prepare", - "Run", - "Back" }; - - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; - - while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - //perform SRID TweakCNN config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_tflite_TweakCNN(&engine_cfg); - } break; - case 3: { - //perform CNN cascade - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - } - err = perform_opencv_cnncascade(&engine_cfg); - } break; - case 4: { - // create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - //configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle [err:%i]\n", err); - break; - } - - //prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle"); - break; - } - } break; - case 5: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource. error code:%i\n", err2); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Object Detect - err = mv_inference_facial_landmark_detect( - mvSource, infer, NULL, _facial_landmark_detected_cb, NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); - - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - } break; - case 6: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } - - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code:%i\n", err); - } - - sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - - while (sel_opt == 0) { - sel_opt = show_menu( - "Run Facial Landmark Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; - } - - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - - return MEDIA_VISION_ERROR_NONE; -} - -int perform_armnn_cpm_config(mv_engine_config_h *engine_cfg) -{ - int err = MEDIA_VISION_ERROR_NONE; - - mv_engine_config_h handle = NULL; - err = mv_create_engine_config(&handle); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create engine configuration handle.\n"); - if (handle) { - int err2 = mv_destroy_engine_config(handle); - if (err2 != MEDIA_VISION_ERROR_NONE) { - printf("Fail to destroy engine configuration.\n"); - } - } - return err; - } - - const char *inputNodeName = "image"; - const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" }; - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, PLD_TFLITE_WEIGHT_PATH); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, - MV_INFERENCE_DATA_FLOAT32); - - mv_engine_config_set_string_attribute( - handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, - 0.0); - - mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, - 1.0); - - mv_engine_config_set_double_attribute( - handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, - MV_INFERENCE_BACKEND_TFLITE); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, - MV_INFERENCE_TARGET_CPU); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, - 192); - - mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, - 192); - - mv_engine_config_set_int_attribute(handle, - MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); - - mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, - inputNodeName); - - mv_engine_config_set_array_string_attribute( - handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); - - *engine_cfg = handle; - return err; -} - - -int perform_pose_landmark_detection() -{ - int err = MEDIA_VISION_ERROR_NONE; - - int sel_opt = 0; - const int options[] = { 1, 2, 3, 4, 5 }; - const char *names[] = { "Configuration", - "TFLITE(CPU) + CPM", - "Prepare", - "Run", - "Back" }; - - mv_engine_config_h engine_cfg = NULL; - mv_inference_h infer = NULL; - mv_source_h mvSource = NULL; - - while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); - switch (sel_opt) { - case 1: { - //perform configuration - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - - err = perform_configuration(&engine_cfg); - } break; - case 2: { - //perform SRID TweakCNN config - if (engine_cfg) { - int err2 = mv_destroy_engine_config(engine_cfg); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err2); - engine_cfg = NULL; - } - err = perform_armnn_cpm_config(&engine_cfg); - } break; - case 3: { - // create - configure - prepare - if (infer) { - int err2 = mv_inference_destroy(infer); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err2); - infer = NULL; - } - - // inference - // create handle - err = mv_inference_create(&infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create inference handle [err:%i]\n", err); - break; - } - - //configure - err = mv_inference_configure(infer, engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to configure inference handle [err:%i]\n", err); - break; - } - - //prepare - err = mv_inference_prepare(infer); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to prepare inference handle"); - break; - } - } break; - case 4: { - if (mvSource) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource\n"); - mvSource = NULL; - } - - char *in_file_name = NULL; - /* Load media source */ - while (input_string("Input file name to be inferred:", 1024, - &(in_file_name)) == -1) - printf("Incorrect input! Try again.\n"); - - err = mv_create_source(&mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to create mvSource.\n"); - free(in_file_name); - break; - } - - err = load_mv_source_from_file(in_file_name, mvSource); - if (err != MEDIA_VISION_ERROR_NONE) { - int err2 = mv_destroy_source(mvSource); - if (err2 != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource err: %d.\n", err2); - mvSource = NULL; - free(in_file_name); - break; - } - free(in_file_name); - - struct timespec s_tspec; - struct timespec e_tspec; - - clock_gettime(CLOCK_MONOTONIC, &s_tspec); - - // Object Detect - err = mv_inference_pose_landmark_detect( - mvSource, infer, NULL, _pose_landmark_detected_cb, NULL); - - clock_gettime(CLOCK_MONOTONIC, &e_tspec); - - struct timespec diffspec = diff(s_tspec, e_tspec); - unsigned long timeDiff = gettotalmillisec(diffspec); - printf("elapsed time : %lu(ms)\n", timeDiff); - } break; - case 5: { - //perform destroy - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - } break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - continue; - } - - int do_another = 0; - if (err != MEDIA_VISION_ERROR_NONE) { - printf("ERROR: Action is finished with error code: %i\n", err); - } - - sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - - while (sel_opt == 0) { - sel_opt = show_menu( - "Run Pose Landmark Detection again?:", options_last, - names_last, ARRAY_SIZE(options_last)); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; - } - - if (engine_cfg) { - err = mv_destroy_engine_config(engine_cfg); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy engine_cfg [err:%i]\n", err); - engine_cfg = NULL; - } - - if (infer) { - err = mv_inference_destroy(infer); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy inference handle [err:%i]\n", err); - infer = NULL; - } - - if (mvSource) { - err = mv_destroy_source(mvSource); - if (err != MEDIA_VISION_ERROR_NONE) - printf("Fail to destroy mvSource [err:%i]\n", err); - mvSource = NULL; - } - - return MEDIA_VISION_ERROR_NONE; -} - -int main() -{ - int sel_opt = 0; - - const int options[] = { 1, 2, 3, 4, 5, 6 }; - const char *names[] = { "Image Classification", "Object Detection", - "Face Detection", "Facial Landmark Detection", - "Pose Landmark Detection", "Exit" }; - - int err = MEDIA_VISION_ERROR_NONE; - while (sel_opt == 0) { - sel_opt = show_menu("Select Action:", options, names, - ARRAY_SIZE(options)); - switch (sel_opt) { - case 1: { - err = perform_image_classification(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform image classification. ERROR[0x%x]\n", err); - } - } break; - case 2: { - err = perform_object_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform object detection. ERROR[0x%x]\n", err); - } - } break; - case 3: { - err = perform_face_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform face detection. ERROR[0x%x]\n", err); - } - } break; - case 4: { - err = perform_facial_landmark_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform facial landmark detection. ERROR[0x%x]\n", err); - } - } break; - case 5: { - err = perform_pose_landmark_detection(); - if (err != MEDIA_VISION_ERROR_NONE) { - printf("Fail to perform pose landmark detection"); - } - } break; - case 6: { - printf("Exit"); - } break; - default: - printf("Invalid option"); - sel_opt = 0; - continue; - } - - int do_another = 0; - - sel_opt = 0; - const int options_last[] = { 1, 2 }; - const char *names_last[] = { "Yes", "No" }; - - while (sel_opt == 0) { - sel_opt = - show_menu("Another action?: ", options_last, names_last, 2); - switch (sel_opt) { - case 1: - do_another = 1; - break; - case 2: - do_another = 0; - break; - default: - printf("Invalid option.\n"); - sel_opt = 0; - } - } - - sel_opt = (do_another == 1) ? 0 : 1; - } - - return 0; -} diff --git a/test/testsuites/machine_learning/CMakeLists.txt b/test/testsuites/machine_learning/CMakeLists.txt new file mode 100644 index 00000000..57785bec --- /dev/null +++ b/test/testsuites/machine_learning/CMakeLists.txt @@ -0,0 +1,4 @@ +project(machine_learning) +cmake_minimum_required(VERSION 2.6) + +add_subdirectory(${PROJECT_SOURCE_DIR}/inference) diff --git a/test/testsuites/machine_learning/inference/CMakeLists.txt b/test/testsuites/machine_learning/inference/CMakeLists.txt new file mode 100644 index 00000000..c72e260f --- /dev/null +++ b/test/testsuites/machine_learning/inference/CMakeLists.txt @@ -0,0 +1,31 @@ +project(mv_infer_test_suite) +cmake_minimum_required(VERSION 2.6) + +set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG _DEBUG) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${LIB_INSTALL_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${MV_CAPI_MEDIA_VISION_INC_DIR}) +include_directories(${INC_IMAGE_HELPER}) +include_directories(${INC_VIDEO_HELPER}) +include_directories(${INC_TS_COMMON}) + +MESSAGE("TESTSUITE: ${MV_CAPI_MEDIA_VISION_INC_DIR}") + +file(GLOB MV_INFER_TEST_SUITE_INC_LIST "${PROJECT_SOURCE_DIR}/*.h") +file(GLOB MV_INFER_TEST_SUITE_SRC_LIST "${PROJECT_SOURCE_DIR}/*.c") + +add_executable(${PROJECT_NAME} + ${MV_INFER_TEST_SUITE_INC_LIST} + ${MV_INFER_TEST_SUITE_SRC_LIST} + ${MV_CAPI_MEDIA_VISION_INC_LIST}) + +target_link_libraries(${PROJECT_NAME} mv_inference + mv_image_helper + mv_video_helper + mv_testsuite_common) + +install(TARGETS ${PROJECT_NAME} DESTINATION ${testbin_dir}) diff --git a/test/testsuites/machine_learning/inference/inference_test_suite.c b/test/testsuites/machine_learning/inference/inference_test_suite.c new file mode 100644 index 00000000..bfeabd4e --- /dev/null +++ b/test/testsuites/machine_learning/inference/inference_test_suite.c @@ -0,0 +1,3310 @@ +/** + * Copyright (c) 2019 Samsung Electronics Co., Ltd All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define ARRAY_SIZE(x) (sizeof((x)) / sizeof((x)[0])) +#define FILE_PATH_SIZE 1024 + +//Image Classification +#define IC_LABEL_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_label.txt" +#define IC_TFLITE_WEIGHT_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_tflite_model.tflite" +#define IC_Q_LABEL_PATH \ + "/usr/share/capi-media-vision/models/IC_Q/tflite/ic_label.txt" +#define IC_Q_TFLITE_WEIGHT_PATH \ + "/usr/share/capi-media-vision/models/IC_Q/tflite/ic_tflite_model.tflite" + +/* + * Hosted models + */ +#define IC_LABEL_MOBILENET_V1_224_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v1_label.txt" +#define IC_TFLITE_WEIGHT_MOBILENET_V1_224_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v1_224x224.tflite" +#define IC_LABEL_MOBILENET_V2_224_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v2_label.txt" +#define IC_TFLITE_WEIGHT_MOBILENET_V2_224_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_mobilenet_v2_224x224.tflite" +#define IC_LABEL_DENSENET_224_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_densenet_label.txt" +#define IC_TFLITE_WEIGHT_DENSENET_224_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_densenet_224x224.tflite" +#define IC_LABEL_INCEPTION_RESENET_299_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_densenet_label.txt" +#define IC_TFLITE_WEIGHT_INCEPTION_RESENET_299_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_resnet_v2_299x299.tflite" +#define IC_LABEL_INCEPTION_V3_299_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v3_label.txt" +#define IC_TFLITE_WEIGHT_INCEPTION_V3_299_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v3_299x299.tflite" +#define IC_LABEL_INCEPTION_V4_299_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v4_label.txt" +#define IC_TFLITE_WEIGHT_INCEPTION_V4_299_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_inception_v4_299x299.tflite" +#define IC_LABEL_NASNET_224_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_nasnet_label.txt" +#define IC_TFLITE_WEIGHT_NASNET_224_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_nasnet_224x224.tflite" +#define IC_LABEL_MNASNET_224_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_mnasnet_label.txt" +#define IC_TFLITE_WEIGHT_MNASNET_224_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_mnasnet_224x224.tflite" +#define IC_LABEL_RESNET_V2_299_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_resnet_v2_label.txt" +#define IC_TFLITE_WEIGHT_RESNET_V2_299_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_resnet_v2_299x299.tflite" +#define IC_LABEL_SQUEEZENET_224_PATH\ + "/usr/share/capi-media-vision/models/IC/tflite/ic_squeezenet_label.txt" +#define IC_TFLITE_WEIGHT_SQUEEZENET_224_PATH \ + "/usr/share/capi-media-vision/models/IC/tflite/ic_squeezenet_224x224.tflite" + +#define IC_VIVANTE_LABEL_PATH \ + "/usr/share/capi-media-vision/models/IC/vivante/ic_label.txt" +#define IC_VIVANTE_WEIGHT_PATH \ + "/usr/share/capi-media-vision/models/IC/vivante/ic_vivante_model.nb" +#define IC_VIVANTE_CONFIG_PATH \ + "/usr/share/capi-media-vision/models/IC/vivante/ic_vivante_model.so" + +#define IC_OPENCV_LABEL_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_label_squeezenet.txt" +#define IC_OPENCV_WEIGHT_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.caffemodel" +#define IC_OPENCV_CONFIG_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/IC/caffe/ic_caffe_model_squeezenet.prototxt" + +//Object Detection +#define OD_LABEL_PATH \ + "/usr/share/capi-media-vision/models/OD/tflite/od_label.txt" +#define OD_TFLITE_WEIGHT_PATH \ + "/usr/share/capi-media-vision/models/OD/tflite/od_tflite_model.tflite" + +#define OD_OPENCV_LABEL_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_label_mobilenetv1ssd.txt" +#define OD_OPENCV_WEIGHT_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.caffemodel" +#define OD_OPENCV_CONFIG_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/OD/caffe/od_caffe_model_mobilenetv1ssd.prototxt" + +/* + * Hosted models + */ +#define OD_LABEL_MOBILENET_V1_SSD_300_PATH \ + "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v1_ssd_postop_label.txt" +#define OD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH \ + "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v1_ssd_postop_300x300.tflite" +#define OD_LABEL_MOBILENET_V2_SSD_320_PATH \ + "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v2_ssd_label.txt" +#define OD_TFLITE_WEIGHT_MOBILENET_V2_SSD_320_PATH \ + "/usr/share/capi-media-vision/models/OD/tflite/od_mobilenet_v2_ssd_320x320.tflite" + +//Face Detection +#define FD_TFLITE_WEIGHT_PATH \ + "/usr/share/capi-media-vision/models/FD/tflite/fd_tflite_model1.tflite" + +#define FD_OPENCV_WEIGHT_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.caffemodel" +#define FD_OPENCV_CONFIG_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/FD/caffe/fd_caffe_model_resnet10ssd.prototxt" + +//Facial LandmarkDetection +#define FLD_TFLITE_WEIGHT_PATH \ + "/usr/share/capi-media-vision/models/FLD/tflite/fld_tflite_model1.tflite" + +#define FLD_OPENCV_WEIGHT_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.caffemodel" +#define FLD_OPENCV_CONFIG_CAFFE_PATH \ + "/usr/share/capi-media-vision/models/FLD/caffe/fld_caffe_model_tweak.prototxt" + +//Pose Detection +#define PLD_TFLITE_WEIGHT_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pld-tflite-001.tflite" +#define PLD_POSE_LABEL_PATH \ + "/usr/share/capi-media-vision/models/PLD/tflite/pose-label.txt" +#define PLD_MOTION_CAPTURE_FILE_PATH \ + "/usr/share/capi-media-vision/models/PLD/mocap/example.bvh" +#define PLD_MOTION_CAPTURE_MAPPING_FILE_PATH \ + "/usr/share/capi-media-vision/models/PLD/mocap/example-mocap-mapping.txt" + +/****** + * Public model: + * IC: mobilenet caffe, tf? + * OD: mobilenetv1-ssd caffe, tf? + * FD: caffe, tf + * FLD: caffe, tf + * PD: cpm model, tf and tflite. + * link : https://github.com/edvardHua/PoseEstimationForMobile/tree/master/release/cpm_model + * Ps. media vision supports cpm and hourglass models for pose estimation for now. + */ + +#define NANO_PER_SEC ((__clock_t) 1000000000) +#define NANO_PER_MILLI ((__clock_t) 1000000) +#define MILLI_PER_SEC ((__clock_t) 1000) + +struct timespec diff(struct timespec start, struct timespec end) +{ + struct timespec temp; + if ((end.tv_nsec - start.tv_nsec) < 0) { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = NANO_PER_SEC + end.tv_nsec - start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + return temp; +} + +unsigned long gettotalmillisec(const struct timespec time) +{ + return time.tv_sec * MILLI_PER_SEC + time.tv_nsec / NANO_PER_MILLI; +} + +void _object_detected_cb(mv_source_h source, const int number_of_objects, + const int *indices, const char **names, + const float *confidences, + const mv_rectangle_s *locations, void *user_data) +{ + printf("In callback: %d objects\n", number_of_objects); + + for (int n = 0; n < number_of_objects; n++) { + printf("%2d\n", indices[n]); + printf("%s\n", names[n]); + printf("%.3f\n", confidences[n]); + printf("%d,%d,%d,%d\n", locations[n].point.x, locations[n].point.y, + locations[n].width, locations[n].height); + } +} + +void _face_detected_cb(mv_source_h source, const int number_of_faces, + const float *confidences, + const mv_rectangle_s *locations, void *user_data) +{ + printf("In callback: %d faces\n", number_of_faces); + + for (int n = 0; n < number_of_faces; n++) { + printf("%.3f\n", confidences[n]); + printf("%d,%d,%d,%d\n", locations[n].point.x, locations[n].point.y, + locations[n].width, locations[n].height); + } +} + +void _facial_landmark_detected_cb(mv_source_h source, + const int number_of_landmarks, + const mv_point_s *locations, void *user_data) +{ + printf("In callback, %d landmarks\n", number_of_landmarks); + for (int n = 0; n < number_of_landmarks; n++) { + printf("%d: x[%d], y[%d]\n", n, locations[n].x, locations[n].y); + } +} + +void _pose_landmark_detected_cb(mv_source_h source, + mv_inference_pose_result_h pose, void *user_data) +{ + int cb_number_of_poses = 0; + int cb_number_of_landmarks = 0; + mv_inference_pose_get_number_of_poses(pose, &cb_number_of_poses); + mv_inference_pose_get_number_of_landmarks(pose, &cb_number_of_landmarks); + printf("%d pose with %d landmarks\n",cb_number_of_poses, cb_number_of_landmarks); + + mv_point_s point; + float score; + for (int k = 0; k < cb_number_of_poses; ++k) + for (int n = 0; n < cb_number_of_landmarks; n++) { + mv_inference_pose_get_landmark(pose, k, n, &point, &score); + printf("%d-%d: x[%d], y[%d] with %.4f\n", k, n, point.x, point.y, score); + } + + mv_pose_h poser; + float poseScore; + int ret = mv_pose_create(&poser); + if (ret != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create mv_pose_h"); + return; + } + + ret = mv_pose_set_from_file(poser, + PLD_MOTION_CAPTURE_FILE_PATH, + PLD_MOTION_CAPTURE_MAPPING_FILE_PATH); + if (ret != MEDIA_VISION_ERROR_NONE) { + mv_pose_destroy(poser); + printf("Fail to mv_pose_set_from_file"); + return; + } + + ret = mv_pose_compare(poser, pose, + (MV_INFERENCE_HUMAN_BODY_PART_LEG_LEFT | MV_INFERENCE_HUMAN_BODY_PART_LEG_RIGHT), + &poseScore); + if (ret != MEDIA_VISION_ERROR_NONE) { + mv_pose_destroy(poser); + printf("Fail to mv_pose_compare"); + return; + } + + printf("[Leg]:Left&Right - poseScore:[%1.4f]", poseScore); + + ret = mv_pose_destroy(poser); + if (ret != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy mv_pose_h but keep going.."); + } + + return; +} + +void _image_classified_cb(mv_source_h source, const int number_of_classes, + const int *indices, const char **names, + const float *confidences, void *user_data) +{ + printf("In callback: %d classes\n", number_of_classes); + + for (int n = 0; n < number_of_classes; ++n) { + printf("%2d\n", indices[n]); + printf("%s\n", names[n]); + printf("%.3f\n", confidences[n]); + } +} + +int show_menu(const char *title, const int *options, const char **names, + int cnt) +{ + printf("*********************************************\n"); + printf("* %38s *\n", title); + printf("*-------------------------------------------*\n"); + int i = 0; + for (i = 0; i < cnt; ++i) + printf("* %2i. %34s *\n", options[i], names[i]); + + printf("*********************************************\n\n"); + int selection = 0; + printf("Your choice: "); + if (scanf("%20i", &selection) == 0) { + if (scanf("%*[^\n]%*c") != 0) { + printf("ERROR: Reading the input line error.\n"); + return -1; + } + printf("ERROR: Incorrect input.\n"); + } + + return selection; +} + +int perform_configure_set_model_config_path(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + char *filePath = NULL; + while (-1 == input_string("Model configuration file path:", FILE_PATH_SIZE, + &(filePath))) { + printf("Incorrect file path! Try again.\n"); + } + + err = mv_engine_config_set_string_attribute( + engine_cfg, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, filePath); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set model configuration file path: %s\n", filePath); + } + + free(filePath); + filePath = NULL; + + return err; +} + +int perform_configure_set_model_weights_path(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + char *filePath = NULL; + while (-1 == input_string("Model weights file path:", FILE_PATH_SIZE, + &(filePath))) { + printf("Incorrect file path! Try again.\n"); + } + + err = mv_engine_config_set_string_attribute( + engine_cfg, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, filePath); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set model weights file path: %s\n", filePath); + } + + free(filePath); + filePath = NULL; + + return err; +} + +int perform_configure_set_input_data_type(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + int dataType = 0; + while (-1 == input_int("Input Tensor Data Type:", 0, 4, &dataType)) { + printf("Invalid type! Try again.\n"); + } + + err = mv_engine_config_set_int_attribute( + engine_cfg, MV_INFERENCE_INPUT_DATA_TYPE, + (mv_inference_data_type_e) dataType); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set input tensor data type: %d\n", dataType); + } + + return err; +} + +int perform_configure_set_model_userfile_path(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + char *filePath = NULL; + while (-1 == input_string("Model user file (category list) path:", + FILE_PATH_SIZE, &(filePath))) { + printf("Incorrect file path! Try again.\n"); + } + + err = mv_engine_config_set_string_attribute( + engine_cfg, MV_INFERENCE_MODEL_USER_FILE_PATH, filePath); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set model user file path: %s\n", filePath); + } + + free(filePath); + filePath = NULL; + + return err; +} + +int perform_configure_set_model_mean_value(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + double meanValue = 0.0; + while (-1 == input_double("mean value:", 0.0, 255.0, &meanValue)) { + printf("Invalid value! Try again.\n"); + } + + err = mv_engine_config_set_double_attribute( + engine_cfg, MV_INFERENCE_MODEL_MEAN_VALUE, meanValue); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set model mean value: %f\n", meanValue); + } + + return err; +} + +int perform_configure_set_image_scale(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + double stdValue = 0.0; + while (-1 == input_double("Image scale factor:", 1.0, 255.0, &stdValue)) { + printf("Invalid value! Try again.\n"); + } + + err = mv_engine_config_set_double_attribute( + engine_cfg, MV_INFERENCE_MODEL_STD_VALUE, stdValue); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set std value: %lf\n", stdValue); + } + + return err; +} + +int perform_configure_set_confidence_threshold(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + double threshold = 0.0; + while (-1 == input_double("threshold:", 0.0, 1.0, &threshold)) { + printf("Invalid value! Try again.\n"); + } + + err = mv_engine_config_set_double_attribute( + engine_cfg, MV_INFERENCE_CONFIDENCE_THRESHOLD, threshold); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set image scale value: %lf\n", threshold); + } + + return err; +} + +int perform_configure_set_backend(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + int backendType = 0; + while (-1 == input_int("Backend Type:", 1, 3, &backendType)) { + printf("Invalid type! Try again.\n"); + } + + err = mv_engine_config_set_int_attribute( + engine_cfg, MV_INFERENCE_BACKEND_TYPE, + (mv_inference_backend_type_e) backendType); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set backend type: %d\n", backendType); + } + + return err; +} + +int perform_configure_set_target(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + int targetType = 0; + while (-1 == input_int("Backend Type:", 1, 2, &targetType)) { + printf("Invalid type! Try again.\n"); + } + + err = mv_engine_config_set_int_attribute( + engine_cfg, MV_INFERENCE_TARGET_TYPE, + (mv_inference_target_type_e) targetType); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set target type: %d\n", targetType); + } + + return err; +} + +int perform_configure_set_tensor_width(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + int tensorW = 0; + while (-1 == input_int("Tensor Width:", INT_MIN, INT_MAX, &tensorW)) { + printf("Invalid value! Try again.\n"); + } + + err = mv_engine_config_set_int_attribute( + engine_cfg, MV_INFERENCE_INPUT_TENSOR_WIDTH, tensorW); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set tensor width: %d\n", tensorW); + } + + return err; +} + +int perform_configure_set_tensor_height(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + int tensorH = 0; + while (-1 == input_int("Tensor Height:", INT_MIN, INT_MAX, &tensorH)) { + printf("Invalid value! Try again.\n"); + } + + err = mv_engine_config_set_int_attribute( + engine_cfg, MV_INFERENCE_INPUT_TENSOR_HEIGHT, tensorH); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set tensor height: %d\n", tensorH); + } + + return err; +} + +int perform_configure_set_tensor_channels(mv_engine_config_h engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + int tensorC = 0; + while (-1 == input_int("Tensor Channels:", INT_MIN, INT_MAX, &tensorC)) { + printf("Invalid value! Try again.\n"); + } + + err = mv_engine_config_set_int_attribute( + engine_cfg, MV_INFERENCE_INPUT_TENSOR_CHANNELS, tensorC); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to set tensor channels: %d\n", tensorC); + } + + return err; +} + +int perform_configuration(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + int sel_opt = 0; + const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; + const char *names[] = { "Set Model Configuration", + "Set Model Weights", + "Set Model Data Type", + "Set Model UserFile", + "Set Model MeanFile", + "Set Image Scale", + "Set Confidence Threshold", + "Set Backend", + "Set Target", + "Set InputTensor Width", + "Set InputTensor Height", + "Set InputTensor Channels", + "Back" }; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + while (sel_opt == 0) { + sel_opt = show_menu("Select Actions: ", options, names, + ARRAY_SIZE(options)); + switch (sel_opt) { + case 1: + err = perform_configure_set_model_config_path(handle); + sel_opt = 0; + break; + case 2: + err = perform_configure_set_model_weights_path(handle); + sel_opt = 0; + break; + case 3: + err = perform_configure_set_input_data_type(handle); + sel_opt = 0; + break; + case 4: + err = perform_configure_set_model_userfile_path(handle); + sel_opt = 0; + break; + case 5: + err = perform_configure_set_model_mean_value(handle); + sel_opt = 0; + break; + case 6: + err = perform_configure_set_image_scale(handle); + sel_opt = 0; + break; + case 7: + err = perform_configure_set_confidence_threshold(handle); + sel_opt = 0; + break; + case 8: + err = perform_configure_set_backend(handle); + sel_opt = 0; + break; + case 9: + err = perform_configure_set_target(handle); + sel_opt = 0; + break; + case 10: + err = perform_configure_set_tensor_width(handle); + sel_opt = 0; + break; + case 11: + err = perform_configure_set_tensor_height(handle); + sel_opt = 0; + break; + case 12: + err = perform_configure_set_tensor_channels(handle); + sel_opt = 0; + break; + case 13: + err = MEDIA_VISION_ERROR_NONE; + break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + } + } + + *engine_cfg = handle; + return err; +} + +int perform_tflite_mobilenetv1_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "input_2"; + const char *outputNodeName[] = { "dense_3/Softmax" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, IC_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_LABEL_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.0); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.0); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 224); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 224); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_armnn_mobilenetv1_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "input_2"; + const char *outputNodeName[] = { "dense_3/Softmax" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, IC_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_LABEL_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.0); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.0); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_ARMNN); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 224); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 224); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_one_mobilenetv1_quant_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "MobilenetV1/Predictions/Reshape_1" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, IC_Q_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_UINT8); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_Q_LABEL_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_ONE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 224); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 224); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_vivante_inceptionv3_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "input"; + const char *outputNodeName[] = { "InceptionV3/Predictions/Peshape_1" }; + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_VIVANTE_WEIGHT_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, + IC_VIVANTE_CONFIG_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_UINT8); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, IC_VIVANTE_LABEL_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.6); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_MLAPI); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_DEVICE_TYPE, + MV_INFERENCE_TARGET_DEVICE_CUSTOM); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 299); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 299); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_opencv_caffe_squeezenet_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "data"; + const char *outputNodeName[] = { "prob" }; + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_OPENCV_WEIGHT_CAFFE_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, + IC_OPENCV_CONFIG_CAFFE_PATH); + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_OPENCV_LABEL_CAFFE_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_OPENCV); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 227); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 227); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + + +int perform_hosted_tflite_mobilenetv1_224_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_MOBILENET_V1_224_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_MOBILENET_V1_224_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_mobilenetv2_224_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_MOBILENET_V2_224_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_MOBILENET_V2_224_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_densenet_224_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_DENSENET_224_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_DENSENET_224_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_inception_resnet_299_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_INCEPTION_RESENET_299_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_INCEPTION_RESENET_299_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_inception_v3_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_INCEPTION_V3_299_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_INCEPTION_V3_299_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_inception_v4_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_INCEPTION_V4_299_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_INCEPTION_V4_299_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_nasnet_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_NASNET_224_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_NASNET_224_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_mnasnet_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_MNASNET_224_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_MNASNET_224_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_resnet_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_RESNET_V2_299_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_RESNET_V2_299_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_squeezenet_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + IC_TFLITE_WEIGHT_SQUEEZENET_224_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + IC_LABEL_SQUEEZENET_224_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_image_classification() +{ + int err = MEDIA_VISION_ERROR_NONE; + + int sel_opt = 0; + const int options[] = { 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19 }; + const char *names[] = { "Configuration", + "TFLite(cpu + Mobilenet)", + "OpenCV(cpu + Squeezenet)", + "ARMNN(cpu + Mobilenet)", + "ONE(cpu + Mobilenet_Q)", + "Vivante(NPU + Inceptionv3)", + "Hosted: TFLite(cpu + Mobilenet V1)", // 7 + "Hosted: TFLite(cpu + Mobilenet V2)", + "Hosted: TFLite(cpu + Densenet)", + "Hosted: TFLite(cpu + Inception Resnet)", + "Hosted: TFLite(cpu + Inception V3)", + "Hosted: TFLite(cpu + Inception V4)", + "Hosted: TFLite(cpu + Nasnet)", + "Hosted: TFLite(cpu + Mnasnet)", + "Hosted: TFLite(cpu + Resnet)", + "Hosted: TFLite(cpu + Squeezenet)", //16 + "Prepare", + "Run", + "Back" }; + + mv_engine_config_h engine_cfg = NULL; + mv_inference_h infer = NULL; + mv_source_h mvSource = NULL; + + while (sel_opt == 0) { + sel_opt = show_menu("Select Action:", options, names, + ARRAY_SIZE(options)); + switch (sel_opt) { + case 1: { + //perform configuration + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_configuration(&engine_cfg); + } break; + case 2: { + // perform TFLite + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_tflite_mobilenetv1_config(&engine_cfg); + } break; + + case 3: { + // perform OpenCV + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_opencv_caffe_squeezenet_config(&engine_cfg); + } break; + case 4: { + // perform ARMNN + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_armnn_mobilenetv1_config(&engine_cfg); + } break; + case 5: { + // perform ONE(On-device Neural Engine) + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_one_mobilenetv1_quant_config(&engine_cfg); + } break; + case 6: { + // perform Vivante + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_vivante_inceptionv3_config(&engine_cfg); + } break; + case 7: { + // perform hosted mobilenetv1 + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_mobilenetv1_224_config(&engine_cfg); + } break; + case 8: { + // perform hosted mobilenetv2 + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_mobilenetv2_224_config(&engine_cfg); + } break; + case 9: { + // perform hosted densenet + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_densenet_224_config(&engine_cfg); + } break; + case 10: { + // perform hosted inception resnet + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_inception_resnet_299_config(&engine_cfg); + } break; + case 11: { + // perform hosted inception v3 + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_inception_v3_config(&engine_cfg); + } break; + case 12: { + // perform hosted inception v4 + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_inception_v4_config(&engine_cfg); + } break; + case 13: { + // perform hosted nasnet + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_nasnet_config(&engine_cfg); + } break; + case 14: { + // perform hosted mnasnet + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_mnasnet_config(&engine_cfg); + } break; + case 15: { + // perform hosted resnet + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_resnet_config(&engine_cfg); + } break; + case 16: { + // perform hosted squeezenet + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_squeezenet_config(&engine_cfg); + } break; + case 17: { + //create - configure - prepare + if (infer) { + int err2 = mv_inference_destroy(infer); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err2); + infer = NULL; + } + + // inference + // create handle + err = mv_inference_create(&infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create inference handle [err:%i]\n", err); + break; + } + + // configure + err = mv_inference_configure(infer, engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to configure inference handle\n"); + break; + } + + // prepare + err = mv_inference_prepare(infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to prepare inference handle."); + break; + } + } break; + case 18: { + if (mvSource) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource.\n"); + mvSource = NULL; + } + + char *in_file_name = NULL; + /* Load media source */ + while (input_string("Input file name to be inferred:", 1024, + &(in_file_name)) == -1) + printf("Incorrect input! Try again.\n"); + + err = mv_create_source(&mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create mvSource.\n"); + free(in_file_name); + break; + } + + err = load_mv_source_from_file(in_file_name, mvSource); + if (MEDIA_VISION_ERROR_NONE != err) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource. error code:%i\n", err2); + mvSource = NULL; + free(in_file_name); + break; + } + free(in_file_name); + + struct timespec s_tspec; + struct timespec e_tspec; + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + // Run + err = mv_inference_image_classify(mvSource, infer, NULL, + _image_classified_cb, NULL); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("elapsed time : %lu(ms)\n", timeDiff); + + } break; + case 19: { + //perform destroy + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + } break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + continue; + } + + int do_another = 0; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("ERROR: Action is finished with error code:%i\n", err); + } + + sel_opt = 0; + const int options_last[2] = { 1, 2 }; + const char *names_last[2] = { "Yes", "No" }; + + while (sel_opt == 0) { + sel_opt = + show_menu("Run Image Classification again?: ", options_last, + names_last, ARRAY_SIZE(options_last)); + switch (sel_opt) { + case 1: + do_another = 1; + break; + case 2: + do_another = 0; + break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + } + } + + sel_opt = (do_another == 1) ? 0 : 1; + } + + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + + return MEDIA_VISION_ERROR_NONE; +} + +/* + * + * Object Detection + * + */ +int perform_tflite_mobilenetv1ssd_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "normalized_input_image_tensor"; + const char *outputNodeName[] = { "TFLite_Detection_PostProcess", + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, OD_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, OD_LABEL_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.5); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.5); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 300); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 300); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); + + *engine_cfg = handle; + return err; +} + +int perform_opencv_mobilenetv1ssd_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "data"; + const char *outputNodeName[1] = { "detection_out" }; + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + OD_OPENCV_WEIGHT_CAFFE_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, + OD_OPENCV_CONFIG_CAFFE_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_USER_FILE_PATH, + OD_OPENCV_LABEL_CAFFE_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.5); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.5); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_OPENCV); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 300); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 300); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_armnn_mobilenetv1ssd_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "normalized_input_image_tensor"; + const char *outputNodeName[] = { "TFLite_Detection_PostProcess", + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, OD_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, OD_LABEL_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.5); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.5); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_ARMNN); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 300); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 300); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_mobilenetv1ssd_300_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + OD_TFLITE_WEIGHT_MOBILENET_V1_SSD_300_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + OD_LABEL_MOBILENET_V1_SSD_300_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_hosted_tflite_mobilenetv2ssd_320_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + OD_TFLITE_WEIGHT_MOBILENET_V2_SSD_320_PATH); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, + OD_LABEL_MOBILENET_V2_SSD_320_PATH); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute( + handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + *engine_cfg = handle; + return err; +} + +int perform_object_detection() +{ + int err = MEDIA_VISION_ERROR_NONE; + + int sel_opt = 0; + const int options[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + const char *names[] = { "Configuration", + "TFLITE(CPU) + MobileNetV1+SSD", + "OPENCV(CPU) + MobileNetV1+SSD", + "ARMNN(CPU) + MobileNetV1+SSD", + "Hosted: TFLite(cpu + MobilenetV1+SSD)", + "Hosted: TFLite(cpu + MobilenetV2+SSD)", + "Prepare", + "Run", + "Back" }; + + mv_engine_config_h engine_cfg = NULL; + mv_inference_h infer = NULL; + mv_source_h mvSource = NULL; + + while (sel_opt == 0) { + sel_opt = show_menu("Select Action:", options, names, + ARRAY_SIZE(options)); + switch (sel_opt) { + case 1: { + //perform configuration + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_configuration(&engine_cfg); + } break; + case 2: { + //perform TFlite MobileSSD config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_tflite_mobilenetv1ssd_config(&engine_cfg); + } break; + case 3: { + //perform OpenCV MobileSSD config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_opencv_mobilenetv1ssd_config(&engine_cfg); + } break; + case 4: { + //perform ARMNN MobileSSD config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_armnn_mobilenetv1ssd_config(&engine_cfg); + } break; + case 5: { + //perform hosted mobilenet v1 + ssd + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_mobilenetv1ssd_300_config(&engine_cfg); + } break; + case 6: { + //perform hosted mobilenet v2 + ssd + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_hosted_tflite_mobilenetv2ssd_320_config(&engine_cfg); + } break; + case 7: { + // create - configure - prepare + if (infer) { + int err2 = mv_inference_destroy(infer); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err2); + infer = NULL; + } + + // inference + // create handle + err = mv_inference_create(&infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create inference handle [err:%i]\n", err); + break; + } + + //configure + err = mv_inference_configure(infer, engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to configure inference handle [err:%i]\n", err); + break; + } + + //prepare + err = mv_inference_prepare(infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to prepare inference handle"); + break; + } + } break; + case 8: { + if (mvSource) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource\n"); + mvSource = NULL; + } + + char *in_file_name = NULL; + /* Load media source */ + while (input_string("Input file name to be inferred:", 1024, + &(in_file_name)) == -1) + printf("Incorrect input! Try again.\n"); + + err = mv_create_source(&mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create mvSource.\n"); + free(in_file_name); + break; + } + + err = load_mv_source_from_file(in_file_name, mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource.\n"); + mvSource = NULL; + free(in_file_name); + break; + } + free(in_file_name); + + struct timespec s_tspec; + struct timespec e_tspec; + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + // Object Detect + err = mv_inference_object_detect(mvSource, infer, + _object_detected_cb, NULL); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("elapsed time : %lu(ms)\n", timeDiff); + + } break; + case 9: { + //perform destroy + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource.\n"); + mvSource = NULL; + } + } break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + continue; + } + + int do_another = 0; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("ERROR: Action is finished with error code:%i\n", err); + } + + sel_opt = 0; + const int options_last[2] = { 1, 2 }; + const char *names_last[2] = { "Yes", "No" }; + + while (sel_opt == 0) { + sel_opt = show_menu("Run Object Detection again?:", options_last, + names_last, ARRAY_SIZE(options_last)); + switch (sel_opt) { + case 1: + do_another = 1; + break; + case 2: + do_another = 0; + break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + } + } + + sel_opt = (do_another == 1) ? 0 : 1; + } + + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource\n"); + mvSource = NULL; + } + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_tflite_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "normalized_input_image_tensor"; + const char *outputNodeName[] = { "TFLite_Detection_PostProcess", + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.5); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.5); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 300); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 300); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); + + *engine_cfg = handle; + return err; +} + +int perform_opencv_resnet10ssd_face(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "data"; + const char *outputNodeName[] = { "detection_out" }; + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FD_OPENCV_WEIGHT_CAFFE_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, + FD_OPENCV_CONFIG_CAFFE_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 135.7); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_OPENCV); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 300); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 300); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_armnn_mobilenetv1ssd_face(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "normalized_input_image_tensor"; + const char *outputNodeName[] = { "TFLite_Detection_PostProcess", + "TFLite_Detection_PostProcess:1", + "TFLite_Detection_PostProcess:2", + "TFLite_Detection_PostProcess:3" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, FD_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.5); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.5); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_ARMNN); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 300); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 300); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 4); + + *engine_cfg = handle; + return err; +} + +int perform_face_detection() +{ + int err = MEDIA_VISION_ERROR_NONE; + + int sel_opt = 0; + const int options[] = { 1, 2, 3, 4, 5, 6, 7 }; + const char *names[] = { "Configuration", + "TFLite(CPU) + MobileNetV1 + SSD", + "OPENCV(CPU) + Resnet10 + SSD", + "ARMNN(CPU) + MobileNetV1 + SSD", + "Prepare", + "Run", + "Back" }; + + mv_engine_config_h engine_cfg = NULL; + mv_inference_h infer = NULL; + mv_source_h mvSource = NULL; + + while (sel_opt == 0) { + sel_opt = show_menu("Select Action:", options, names, + ARRAY_SIZE(options)); + switch (sel_opt) { + case 1: { + //perform configuration + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_configuration(&engine_cfg); + } break; + case 2: { + //perform TF Mobilenetssd config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_tflite_mobilenetv1ssd_face(&engine_cfg); + } break; + case 3: { + //perform TF Lite Mobilenetssd config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_opencv_resnet10ssd_face(&engine_cfg); + } break; + case 4: { + //perform TF Lite Mobilenetssd config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + + err = perform_armnn_mobilenetv1ssd_face(&engine_cfg); + } break; + case 5: { + // create - configure - prepare + if (infer) { + int err2 = mv_inference_destroy(infer); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err2); + infer = NULL; + } + + // inference + // create handle + err = mv_inference_create(&infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create inference handle [err:%i]\n", err); + break; + } + + //configure + err = mv_inference_configure(infer, engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to configure inference handle [err:%i]\n", err); + break; + } + + //prepare + err = mv_inference_prepare(infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to prepare inference handle"); + break; + } + } break; + case 6: { + if (mvSource) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource\n"); + mvSource = NULL; + } + + char *in_file_name = NULL; + /* Load media source */ + while (input_string("Input file name to be inferred:", 1024, + &(in_file_name)) == -1) + printf("Incorrect input! Try again.\n"); + + err = mv_create_source(&mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create mvSource.\n"); + free(in_file_name); + break; + } + + err = load_mv_source_from_file(in_file_name, mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource. error code:%i\n", err2); + mvSource = NULL; + free(in_file_name); + break; + } + free(in_file_name); + + struct timespec s_tspec; + struct timespec e_tspec; + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + // Object Detect + err = mv_inference_face_detect(mvSource, infer, _face_detected_cb, + NULL); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("elapsed time : %lu(ms)\n", timeDiff); + } break; + case 7: { + //perform destroy + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + } break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + continue; + } + + int do_another = 0; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("ERROR: Action is finished with error code:%i\n", err); + } + + sel_opt = 0; + const int options_last[] = { 1, 2 }; + const char *names_last[] = { "Yes", "No" }; + + while (sel_opt == 0) { + sel_opt = show_menu("Run Face Detection again?:", options_last, + names_last, ARRAY_SIZE(options_last)); + switch (sel_opt) { + case 1: + do_another = 1; + break; + case 2: + do_another = 0; + break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + } + } + + sel_opt = (do_another == 1) ? 0 : 1; + } + + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_tflite_TweakCNN(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "INPUT_TENSOR_NAME"; + const char *outputNodeName[] = { "OUTPUT_TENSOR_NAME" }; + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FLD_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 128); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 128); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_opencv_cnncascade(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "data"; + const char *outputNodeName[] = { "Sigmoid_fc2" }; + + mv_engine_config_set_string_attribute(handle, + MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, + FLD_OPENCV_WEIGHT_CAFFE_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_CONFIGURATION_FILE_PATH, + FLD_OPENCV_CONFIG_CAFFE_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 127.5); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 127.5); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_OPENCV); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 128); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 128); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + +int perform_facial_landmark_detection() +{ + int err = MEDIA_VISION_ERROR_NONE; + + int sel_opt = 0; + const int options[] = { 1, 2, 3, 4, 5, 6 }; + const char *names[] = { "Configuration", + "Tflite(CPU) + TweakCNN", + "OPENCV(CPU) + TweakCNN", + "Prepare", + "Run", + "Back" }; + + mv_engine_config_h engine_cfg = NULL; + mv_inference_h infer = NULL; + mv_source_h mvSource = NULL; + + while (sel_opt == 0) { + sel_opt = show_menu("Select Action:", options, names, + ARRAY_SIZE(options)); + switch (sel_opt) { + case 1: { + //perform configuration + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_configuration(&engine_cfg); + } break; + case 2: { + //perform SRID TweakCNN config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + err = perform_tflite_TweakCNN(&engine_cfg); + } break; + case 3: { + //perform CNN cascade + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + } + err = perform_opencv_cnncascade(&engine_cfg); + } break; + case 4: { + // create - configure - prepare + if (infer) { + int err2 = mv_inference_destroy(infer); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err2); + infer = NULL; + } + + // inference + // create handle + err = mv_inference_create(&infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create inference handle [err:%i]\n", err); + break; + } + + //configure + err = mv_inference_configure(infer, engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to configure inference handle [err:%i]\n", err); + break; + } + + //prepare + err = mv_inference_prepare(infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to prepare inference handle"); + break; + } + } break; + case 5: { + if (mvSource) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource\n"); + mvSource = NULL; + } + + char *in_file_name = NULL; + /* Load media source */ + while (input_string("Input file name to be inferred:", 1024, + &(in_file_name)) == -1) + printf("Incorrect input! Try again.\n"); + + err = mv_create_source(&mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create mvSource.\n"); + free(in_file_name); + break; + } + + err = load_mv_source_from_file(in_file_name, mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource. error code:%i\n", err2); + mvSource = NULL; + free(in_file_name); + break; + } + free(in_file_name); + + struct timespec s_tspec; + struct timespec e_tspec; + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + // Object Detect + err = mv_inference_facial_landmark_detect( + mvSource, infer, NULL, _facial_landmark_detected_cb, NULL); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("elapsed time : %lu(ms)\n", timeDiff); + } break; + case 6: { + //perform destroy + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + } break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + continue; + } + + int do_another = 0; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("ERROR: Action is finished with error code:%i\n", err); + } + + sel_opt = 0; + const int options_last[] = { 1, 2 }; + const char *names_last[] = { "Yes", "No" }; + + while (sel_opt == 0) { + sel_opt = show_menu( + "Run Facial Landmark Detection again?:", options_last, + names_last, ARRAY_SIZE(options_last)); + switch (sel_opt) { + case 1: + do_another = 1; + break; + case 2: + do_another = 0; + break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + } + } + + sel_opt = (do_another == 1) ? 0 : 1; + } + + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + + return MEDIA_VISION_ERROR_NONE; +} + +int perform_armnn_cpm_config(mv_engine_config_h *engine_cfg) +{ + int err = MEDIA_VISION_ERROR_NONE; + + mv_engine_config_h handle = NULL; + err = mv_create_engine_config(&handle); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create engine configuration handle.\n"); + if (handle) { + int err2 = mv_destroy_engine_config(handle); + if (err2 != MEDIA_VISION_ERROR_NONE) { + printf("Fail to destroy engine configuration.\n"); + } + } + return err; + } + + const char *inputNodeName = "image"; + const char *outputNodeName[] = { "Convolutional_Pose_Machine/stage_5_out" }; + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_WEIGHT_FILE_PATH, PLD_TFLITE_WEIGHT_PATH); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_DATA_TYPE, + MV_INFERENCE_DATA_FLOAT32); + + mv_engine_config_set_string_attribute( + handle, MV_INFERENCE_MODEL_USER_FILE_PATH, PLD_POSE_LABEL_PATH); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_MEAN_VALUE, + 0.0); + + mv_engine_config_set_double_attribute(handle, MV_INFERENCE_MODEL_STD_VALUE, + 1.0); + + mv_engine_config_set_double_attribute( + handle, MV_INFERENCE_CONFIDENCE_THRESHOLD, 0.3); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_BACKEND_TYPE, + MV_INFERENCE_BACKEND_TFLITE); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_TARGET_TYPE, + MV_INFERENCE_TARGET_CPU); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_WIDTH, + 192); + + mv_engine_config_set_int_attribute(handle, MV_INFERENCE_INPUT_TENSOR_HEIGHT, + 192); + + mv_engine_config_set_int_attribute(handle, + MV_INFERENCE_INPUT_TENSOR_CHANNELS, 3); + + mv_engine_config_set_string_attribute(handle, MV_INFERENCE_INPUT_NODE_NAME, + inputNodeName); + + mv_engine_config_set_array_string_attribute( + handle, MV_INFERENCE_OUTPUT_NODE_NAMES, outputNodeName, 1); + + *engine_cfg = handle; + return err; +} + + +int perform_pose_landmark_detection() +{ + int err = MEDIA_VISION_ERROR_NONE; + + int sel_opt = 0; + const int options[] = { 1, 2, 3, 4, 5 }; + const char *names[] = { "Configuration", + "TFLITE(CPU) + CPM", + "Prepare", + "Run", + "Back" }; + + mv_engine_config_h engine_cfg = NULL; + mv_inference_h infer = NULL; + mv_source_h mvSource = NULL; + + while (sel_opt == 0) { + sel_opt = show_menu("Select Action:", options, names, + ARRAY_SIZE(options)); + switch (sel_opt) { + case 1: { + //perform configuration + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + + err = perform_configuration(&engine_cfg); + } break; + case 2: { + //perform SRID TweakCNN config + if (engine_cfg) { + int err2 = mv_destroy_engine_config(engine_cfg); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err2); + engine_cfg = NULL; + } + err = perform_armnn_cpm_config(&engine_cfg); + } break; + case 3: { + // create - configure - prepare + if (infer) { + int err2 = mv_inference_destroy(infer); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err2); + infer = NULL; + } + + // inference + // create handle + err = mv_inference_create(&infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create inference handle [err:%i]\n", err); + break; + } + + //configure + err = mv_inference_configure(infer, engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to configure inference handle [err:%i]\n", err); + break; + } + + //prepare + err = mv_inference_prepare(infer); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to prepare inference handle"); + break; + } + } break; + case 4: { + if (mvSource) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource\n"); + mvSource = NULL; + } + + char *in_file_name = NULL; + /* Load media source */ + while (input_string("Input file name to be inferred:", 1024, + &(in_file_name)) == -1) + printf("Incorrect input! Try again.\n"); + + err = mv_create_source(&mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to create mvSource.\n"); + free(in_file_name); + break; + } + + err = load_mv_source_from_file(in_file_name, mvSource); + if (err != MEDIA_VISION_ERROR_NONE) { + int err2 = mv_destroy_source(mvSource); + if (err2 != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource err: %d.\n", err2); + mvSource = NULL; + free(in_file_name); + break; + } + free(in_file_name); + + struct timespec s_tspec; + struct timespec e_tspec; + + clock_gettime(CLOCK_MONOTONIC, &s_tspec); + + // Object Detect + err = mv_inference_pose_landmark_detect( + mvSource, infer, NULL, _pose_landmark_detected_cb, NULL); + + clock_gettime(CLOCK_MONOTONIC, &e_tspec); + + struct timespec diffspec = diff(s_tspec, e_tspec); + unsigned long timeDiff = gettotalmillisec(diffspec); + printf("elapsed time : %lu(ms)\n", timeDiff); + } break; + case 5: { + //perform destroy + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + } break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + continue; + } + + int do_another = 0; + if (err != MEDIA_VISION_ERROR_NONE) { + printf("ERROR: Action is finished with error code: %i\n", err); + } + + sel_opt = 0; + const int options_last[] = { 1, 2 }; + const char *names_last[] = { "Yes", "No" }; + + while (sel_opt == 0) { + sel_opt = show_menu( + "Run Pose Landmark Detection again?:", options_last, + names_last, ARRAY_SIZE(options_last)); + switch (sel_opt) { + case 1: + do_another = 1; + break; + case 2: + do_another = 0; + break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + } + } + + sel_opt = (do_another == 1) ? 0 : 1; + } + + if (engine_cfg) { + err = mv_destroy_engine_config(engine_cfg); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy engine_cfg [err:%i]\n", err); + engine_cfg = NULL; + } + + if (infer) { + err = mv_inference_destroy(infer); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy inference handle [err:%i]\n", err); + infer = NULL; + } + + if (mvSource) { + err = mv_destroy_source(mvSource); + if (err != MEDIA_VISION_ERROR_NONE) + printf("Fail to destroy mvSource [err:%i]\n", err); + mvSource = NULL; + } + + return MEDIA_VISION_ERROR_NONE; +} + +int main() +{ + int sel_opt = 0; + + const int options[] = { 1, 2, 3, 4, 5, 6 }; + const char *names[] = { "Image Classification", "Object Detection", + "Face Detection", "Facial Landmark Detection", + "Pose Landmark Detection", "Exit" }; + + int err = MEDIA_VISION_ERROR_NONE; + while (sel_opt == 0) { + sel_opt = show_menu("Select Action:", options, names, + ARRAY_SIZE(options)); + switch (sel_opt) { + case 1: { + err = perform_image_classification(); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform image classification. ERROR[0x%x]\n", err); + } + } break; + case 2: { + err = perform_object_detection(); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform object detection. ERROR[0x%x]\n", err); + } + } break; + case 3: { + err = perform_face_detection(); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform face detection. ERROR[0x%x]\n", err); + } + } break; + case 4: { + err = perform_facial_landmark_detection(); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform facial landmark detection. ERROR[0x%x]\n", err); + } + } break; + case 5: { + err = perform_pose_landmark_detection(); + if (err != MEDIA_VISION_ERROR_NONE) { + printf("Fail to perform pose landmark detection"); + } + } break; + case 6: { + printf("Exit"); + } break; + default: + printf("Invalid option"); + sel_opt = 0; + continue; + } + + int do_another = 0; + + sel_opt = 0; + const int options_last[] = { 1, 2 }; + const char *names_last[] = { "Yes", "No" }; + + while (sel_opt == 0) { + sel_opt = + show_menu("Another action?: ", options_last, names_last, 2); + switch (sel_opt) { + case 1: + do_another = 1; + break; + case 2: + do_another = 0; + break; + default: + printf("Invalid option.\n"); + sel_opt = 0; + } + } + + sel_opt = (do_another == 1) ? 0 : 1; + } + + return 0; +}