Did code refactoring to InputMetadata and OuputMetadata.
The biggest change of this patch is to delegate all requests
of various metadata relevant classes to InputMetadata and
OutputMetadata classes to mitigate Inference class dependency of
Metadata things.
In addition, it makes several classes declared in BoxInfo class to get out
, and changes class to struct in case that class functionality isn't needed.
And some cleanup and code sliding.
This is just one step for next code refactoring.
Change-Id: I92c4e2b2c77499246c56a237282c05262550301a
Signed-off-by: Inki Dae <inki.dae@samsung.com>
class InputMetadata
{
public:
- bool parsed;
- std::map<std::string, LayerInfo> layer;
- std::map<std::string, Options> option;
-
/**
* @brief Creates an InputMetadata class instance.
*
* @since_tizen 6.5
*/
int Parse(JsonObject *root);
+ bool IsParsed(void) { return parsed; }
+ std::map<std::string, LayerInfo>& GetLayer() { return layer; }
+ std::map<std::string, Options>& GetOption() { return option; }
private:
+ bool parsed;
std::map<std::string, inference_tensor_shape_type_e> mSupportedShapeType;
std::map<std::string, mv_inference_data_type_e> mSupportedDataType;
std::map<std::string, mv_colorspace_e> mSupportedColorSpace;
+ std::map<std::string, LayerInfo> layer;
+ std::map<std::string, Options> option;
template <typename T>
static T GetSupportedType(JsonObject* root, std::string typeName,
OutputMetadata mMeta;
int mBoxOffset;
int mNumberOfOjects;
-
- ScoreInfo& mScoreInfo;
- BoxInfo& mBoxInfo;
-
float mScaleW;
float mScaleH;
-
Boxes mResultBoxes;
float decodeScore(int idx);
int boxOffset, float scaleW, float scaleH, int numberOfObjects = 0) :
mTensorBuffer(buffer), mMeta(metaData),
mBoxOffset(boxOffset), mNumberOfOjects(numberOfObjects),
- mScoreInfo(mMeta.GetScore()), mBoxInfo(mMeta.GetBox()),
- mScaleW(scaleW), mScaleH(scaleH),
- mResultBoxes() {
+ mScaleW(scaleW), mScaleH(scaleH), mResultBoxes() {
};
~ObjectDecoder() = default;
{
private:
std::vector<int> dims;
+
public:
std::vector<int> GetValidIndexAll() const;
void SetValidIndex(int index);
private:
double scale;
double zeropoint;
+
public:
DeQuantization(double s, double z) : scale(s), zeropoint(z) {};
~DeQuantization() = default;
std::string name;
DimInfo dimInfo;
double threshold;
- inference_score_type_e type;
int topNumber;
+ inference_score_type_e type;
std::shared_ptr<DeQuantization> deQuantization;
-
std::map<std::string, inference_score_type_e> supportedScoreTypes;
public:
inference_score_type_e GetType() { return type; }
int GetTopNumber() { return topNumber; }
std::shared_ptr<DeQuantization> GetDeQuant() { return deQuantization; }
-
int ParseScore(JsonObject *root);
};
- class BoxInfo
- {
+ struct AnchorParam {
+ int mode; /**< 0: generate anchor, 1:load pre-anchor*/
+ int numLayers;
+ float minScale;
+ float maxScale;
+ int inputSizeHeight;
+ int inputSizeWidth;
+ float anchorOffsetX;
+ float anchorOffsetY;
+ std::vector<int> strides;
+ std::vector<float> aspectRatios;
+ bool isReduceBoxedInLowestLayer;
+ float interpolatedScaleAspectRatio;
+ bool isFixedAnchorSize;
+ bool isExponentialBoxScale;
+ float xScale;
+ float yScale;
+ float wScale;
+ float hScale;
+ };
+
+ struct NMSParam {
+ inference_box_nms_type_e mode; /**< 0: standard */
+ float iouThreshold;
+ std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
+ };
+
+ struct RotateParam {
+ int startPointIndex;
+ int endPointIndex;
+ cv::Point2f startPoint;
+ cv::Point2f endPoint;
+ float baseAngle;
+ };
+
+ struct RoiOptionParam {
+ int startPointIndex;
+ int endPointIndex;
+ int centerPointIndex;
+ cv::Point2f centerPoint;
+ float shiftX;
+ float shiftY;
+ float scaleX;
+ float scaleY;
+ int mode;
+ };
+
+ class DecodeInfo {
+ private:
+ AnchorParam anchorParam;
+ std::vector<cv::Rect2f> anchorBoxes;
+ NMSParam nmsParam;
+ RotateParam rotParam;
+ RoiOptionParam roiOptParam;
+
public:
- class DecodeInfo {
- public:
- class AnchorParam {
- public:
- int mode; /**< 0: generate anchor, 1:load pre-anchor*/
- int numLayers;
- float minScale;
- float maxScale;
- int inputSizeHeight;
- int inputSizeWidth;
- float anchorOffsetX;
- float anchorOffsetY;
- std::vector<int> strides;
- std::vector<float> aspectRatios;
- bool isReduceBoxedInLowestLayer;
- float interpolatedScaleAspectRatio;
- bool isFixedAnchorSize;
- bool isExponentialBoxScale;
- float xScale;
- float yScale;
- float wScale;
- float hScale;
-
- AnchorParam() = default;
- ~AnchorParam() = default;
- };
-
- class NMSParam {
- public:
- inference_box_nms_type_e mode; /**< 0: standard */
- float iouThreshold;
-
- std::map<std::string, inference_box_nms_type_e> supportedBoxNmsTypes;
-
- NMSParam() : mode(INFERENCE_BOX_NMS_TYPE_NONE), iouThreshold(0.2f) {
- supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
- };
-
- ~NMSParam() = default;
- };
-
- class RotateParam {
- public:
- int startPointIndex;
- int endPointIndex;
- cv::Point2f startPoint;
- cv::Point2f endPoint;
- float baseAngle;
-
- RotateParam() : startPointIndex(-1),
- endPointIndex(-1),
- startPoint(cv::Point2f(0.f,0.f)),
- endPoint(cv::Point2f(0.f,0.f)),
- baseAngle(0.f){};
- ~RotateParam() = default;
- };
-
- class RoiOptionParam {
- public:
- int startPointIndex;
- int endPointIndex;
- int centerPointIndex;
- cv::Point2f centerPoint;
- float shiftX;
- float shiftY;
- float scaleX;
- float scaleY;
- int mode;
-
- RoiOptionParam() : startPointIndex(-1),
- endPointIndex(-1),
- centerPointIndex(-1),
- centerPoint(cv::Point2f(0.f, 0.f)),
- shiftX(0.f), shiftY(0.f),
- scaleX(1.f), scaleY(1.f),
- mode(-1) {};
- ~RoiOptionParam() = default;
- };
-
- private:
- AnchorParam anchorParam;
- std::vector<cv::Rect2f> anchorBoxes;
- NMSParam nmsParam;
- RotateParam rotParam;
- RoiOptionParam roiOptParam;
-
- public:
- DecodeInfo() = default;
- ~DecodeInfo() = default;
- std::vector<cv::Rect2f>& GetAnchorBoxAll();
- bool IsAnchorBoxEmpty();
- void AddAnchorBox(cv::Rect2f& ahcnor);
- void ClearAnchorBox();
-
- // Anchor param
- int ParseAnchorParam(JsonObject *root);
- int GenerateAnchor();
- bool IsFixedAnchorSize();
- bool IsExponentialBoxScale();
- float GetAnchorXscale();
- float GetAnchorYscale();
- float GetAnchorWscale();
- float GetAnchorHscale();
- float CalculateScale(float min, float max, int index, int maxStride);
-
- // Nms param
- int ParseNms(JsonObject *root);
- int GetNmsMode();
- float GetNmsIouThreshold();
-
- // Rotate param
- int ParseRotate(JsonObject *root);
- int GetRotStartPointIndex();
- int GetRotEndPointIndex();
- float GetBaseAngle();
-
- // Roi option param
- int ParseRoiOption(JsonObject *root);
- int GetRoiMode();
- int GetRoiCenterPointIndex();
- int GetRoiStartPointIndex();
- int GetRoiEndPointIndex();
- float GetShiftX();
- float GetShiftY();
- float GetScaleX();
- float GetScaleY();
- };
+ DecodeInfo() {
+ nmsParam.mode = INFERENCE_BOX_NMS_TYPE_NONE;
+ nmsParam.iouThreshold = 0.2f;
+ nmsParam.supportedBoxNmsTypes.insert({"STANDARD", INFERENCE_BOX_NMS_TYPE_STANDARD});
+
+ rotParam.startPointIndex = -1;
+ rotParam.endPointIndex = -1;
+ rotParam.startPoint = cv::Point2f(0.f,0.f);
+ rotParam.endPoint = cv::Point2f(0.f,0.f);
+ rotParam.baseAngle = 0.f;
+
+ roiOptParam.startPointIndex = -1;
+ roiOptParam.endPointIndex = -1;
+ roiOptParam.centerPointIndex = -1;
+ roiOptParam.centerPoint = cv::Point2f(0.f, 0.f);
+ roiOptParam.shiftX = 0.f;
+ roiOptParam.shiftY = 0.f;
+ roiOptParam.scaleX = 1.f;
+ roiOptParam.scaleY = 1.f;
+ roiOptParam.mode = -1;
+ }
+
+ ~DecodeInfo() = default;
+
+ std::vector<cv::Rect2f>& GetAnchorBoxAll();
+ bool IsAnchorBoxEmpty();
+ void AddAnchorBox(cv::Rect2f& ahcnor);
+ void ClearAnchorBox();
+
+ // Anchor param
+ int ParseAnchorParam(JsonObject *root);
+ int GenerateAnchor();
+ bool IsFixedAnchorSize();
+ bool IsExponentialBoxScale();
+ float GetAnchorXscale();
+ float GetAnchorYscale();
+ float GetAnchorWscale();
+ float GetAnchorHscale();
+ float CalculateScale(float min, float max, int index, int maxStride);
+
+ // Nms param
+ int ParseNms(JsonObject *root);
+ int GetNmsMode();
+ float GetNmsIouThreshold();
+
+ // Rotate param
+ int ParseRotate(JsonObject *root);
+ int GetRotStartPointIndex();
+ int GetRotEndPointIndex();
+ float GetBaseAngle();
+
+ // Roi option param
+ int ParseRoiOption(JsonObject *root);
+ int GetRoiMode();
+ int GetRoiCenterPointIndex();
+ int GetRoiStartPointIndex();
+ int GetRoiEndPointIndex();
+ float GetShiftX();
+ float GetShiftY();
+ float GetScaleX();
+ float GetScaleY();
+ };
+ class BoxInfo
+ {
private:
std::string name;
DimInfo dimInfo;
int ParseNumber(JsonObject *root);
};
+ struct HeatMapInfo {
+ int wIdx;
+ int hIdx;
+ int cIdx;
+ float nmsRadius;
+ inference_tensor_shape_type_e shapeType;
+ };
+
class Landmark
{
- public:
- class DecodeInfo {
- public:
- class HeatMapInfo {
- public:
- int wIdx;
- int hIdx;
- int cIdx;
- inference_tensor_shape_type_e shapeType;
- float nmsRadius;
- HeatMapInfo() = default;
- ~HeatMapInfo() = default;
- };
- HeatMapInfo heatMap;
- DecodeInfo() = default;
- ~DecodeInfo() = default;
- };
private:
std::string name;
DimInfo dimInfo;
inference_landmark_decoding_type_e decodingType; /**< 0: decoding unnecessary,
1: decoding heatmap,
2: decoding heatmap with refinement */
- DecodeInfo decodingInfo;
+ HeatMapInfo heatMapInfo;
std::map<std::string, inference_landmark_type_e> supportedLandmarkTypes;
std::map<std::string, inference_landmark_coorindate_type_e> supportedLandmarkCoordinateTypes;
int GetOffset();
inference_landmark_coorindate_type_e GetCoordinate();
inference_landmark_decoding_type_e GetDecodingType();
- DecodeInfo& GetDecodingInfo();
+ HeatMapInfo& GetHeatMapInfo();
int ParseLandmark(JsonObject *root);
};
Edge() = default;
~Edge() = default;
int ParseEdge(JsonObject *root);
- std::vector<std::pair<int, int>>& GetEdgesAll();
+ std::vector<std::pair<int, int>>& GetEdgesAll() { return edges; }
};
class OutputMetadata
*/
int Parse(JsonObject *root);
- bool IsParsed();
- ScoreInfo& GetScore();
- BoxInfo& GetBox();
- Label& GetLabel();
- Number& GetNumber();
- Landmark& GetLandmark();
- OffsetVec& GetOffset();
- std::vector<DispVec>& GetDispVecAll();
- Edge& GetEdge();
+ bool IsParsed() { return parsed; }
+
+ std::string GetScoreName() { return score.GetName(); }
+ DimInfo GetScoreDimInfo() { return score.GetDimInfo(); }
+ inference_score_type_e GetScoreType() { return score.GetType(); }
+ double GetScoreThreshold() { return score.GetThresHold(); }
+ int GetScoreTopNumber() { return score.GetTopNumber(); }
+ std::shared_ptr<DeQuantization> GetScoreDeQuant() { return score.GetDeQuant(); }
+ std::string GetBoxName() { return box.GetName(); }
+ DimInfo GetBoxDimInfo() { return box.GetDimInfo(); }
+ std::vector<int> GetBoxOrder() { return box.GetOrder(); }
+ DecodeInfo& GetBoxDecodeInfo() { return box.GetDecodeInfo(); }
+ inference_box_type_e GetBoxType() { return box.GetType(); }
+ int GetScoreCoordinate() { return box.GetCoordinate(); }
+ std::string GetLabelName() { return label.GetName(); }
+ std::string GetNumberName() { return number.GetName(); }
+ DimInfo GetNumberDimInfo() { return number.GetDimInfo(); }
+ std::string GetLandmarkName() { return landmark.GetName(); }
+ int GetLandmarkOffset() { return landmark.GetOffset(); }
+ inference_landmark_type_e GetLandmarkType() { return landmark.GetType(); }
+ DimInfo GetLandmarkDimInfo() { return landmark.GetDimInfo(); }
+ HeatMapInfo& GetLandmarkHeatMapInfo() { return landmark.GetHeatMapInfo(); }
+ inference_landmark_coorindate_type_e GetLandmarkCoordinate() { return landmark.GetCoordinate(); }
+ inference_landmark_decoding_type_e GetLandmarkDecodingType() { return landmark.GetDecodingType(); }
+ std::string GetOffsetVecName() { return offsetVec.GetName(); }
+ inference_box_decoding_type_e GetBoxDecodingType() { return box.GetDecodingType(); }
+ std::vector<DispVec>& GetDispVecAll() { return dispVecs; }
+ std::vector<std::pair<int, int>>& GetEdges() { return edgeMap.GetEdgesAll(); }
template <typename T>
static T GetSupportedType(JsonObject* root, std::string typeName,
std::map<std::string, T>& supportedTypes);
};
-
} /* Inference */
} /* MediaVision */
mConfig.mDataType = static_cast<mv_inference_data_type_e>(dataType);
mConfig.mInputLayerNames = names;
- const InputMetadata& inputMeta = mMetadata.GetInputMeta();
- if (inputMeta.parsed) {
+ if (mMetadata.GetInputMeta().IsParsed()) {
LOGI("use input meta");
- auto& layerInfo = inputMeta.layer.begin()->second;
+ auto& layerInfo = mMetadata.GetInputMeta().GetLayer().begin()->second;
if (layerInfo.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) { // NCHW
mConfig.mTensorInfo.ch = layerInfo.dims[1];
mConfig.mTensorInfo.dim = layerInfo.dims[0];
LOGE("Invalid shape type[%d]", layerInfo.shapeType);
}
- if (!inputMeta.option.empty()) {
- auto& option = inputMeta.option.begin()->second;
+ if (!mMetadata.GetInputMeta().GetOption().empty()) {
+ auto& option = mMetadata.GetInputMeta().GetOption().begin()->second;
if (option.normalization.use) {
mConfig.mMeanValue = option.normalization.mean[0];
mConfig.mStdValue = option.normalization.std[0];
mConfig.mDataType = layerInfo.dataType;
mConfig.mInputLayerNames.clear();
- for (auto& layer : inputMeta.layer) {
+ for (auto& layer : mMetadata.GetInputMeta().GetLayer()) {
mConfig.mInputLayerNames.push_back(layer.first);
}
}
mConfig.mOutputLayerNames = names;
OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
- if (outputMeta.IsParsed()) {
+ if (mMetadata.GetOutputMeta().IsParsed()) {
mConfig.mOutputLayerNames.clear();
- if (!outputMeta.GetScore().GetName().empty())
- mConfig.mOutputLayerNames.push_back(outputMeta.GetScore().GetName());
+ if (!outputMeta.GetScoreName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetScoreName());
- if (!outputMeta.GetBox().GetName().empty())
- mConfig.mOutputLayerNames.push_back(outputMeta.GetBox().GetName());
+ if (!outputMeta.GetBoxName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetBoxName());
- if (!outputMeta.GetLabel().GetName().empty())
- mConfig.mOutputLayerNames.push_back(outputMeta.GetLabel().GetName());
+ if (!outputMeta.GetLabelName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetLabelName());
- if (!outputMeta.GetNumber().GetName().empty())
- mConfig.mOutputLayerNames.push_back(outputMeta.GetNumber().GetName());
+ if (!outputMeta.GetNumberName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetNumberName());
- if (!outputMeta.GetLandmark().GetName().empty())
- mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmark().GetName());
+ if (!outputMeta.GetLandmarkName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetLandmarkName());
- if (!outputMeta.GetOffset().GetName().empty())
- mConfig.mOutputLayerNames.push_back(outputMeta.GetOffset().GetName());
+ if (!outputMeta.GetOffsetVecName().empty())
+ mConfig.mOutputLayerNames.push_back(outputMeta.GetOffsetVecName());
for (auto& dispVec : outputMeta.GetDispVecAll()) {
mConfig.mOutputLayerNames.push_back(dispVec.GetName());
return MEDIA_VISION_ERROR_INVALID_PARAMETER;
}
- const InputMetadata& inputMeta = mMetadata.GetInputMeta();
- if (inputMeta.parsed) {
+ if (mMetadata.GetInputMeta().IsParsed()) {
for (auto& buffer : mInputTensorBuffers.getAllTensorBuffer()) {
inference_engine_tensor_buffer& tensor_buffer = buffer.second;
- const LayerInfo& layerInfo = inputMeta.layer.at(buffer.first);
- const Options& opt = inputMeta.option.empty() ? Options() : inputMeta.option.at(buffer.first);
+ const LayerInfo& layerInfo = mMetadata.GetInputMeta().GetLayer().at(buffer.first);
+ const Options& opt = mMetadata.GetInputMeta().GetOption().empty() ? Options() : mMetadata.GetInputMeta().GetOption().at(buffer.first);
int data_type = ConvertToCv(tensor_buffer.data_type);
int Inference::GetClassficationResults(ImageClassificationResults &results)
{
- OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
// Will contain top N results in ascending order.
std::vector<std::pair<float, int>> topScore;
auto threadHold = mConfig.mConfidenceThresHold;
results.number_of_classes = 0;
- if (outputMeta.IsParsed()) {
- auto& info = outputMeta.GetScore();
- std::vector<int> indexes = info.GetDimInfo().GetValidIndexAll();
+ if (mMetadata.GetOutputMeta().IsParsed()) {
+ OutputMetadata outputMetadata = mMetadata.GetOutputMeta();
+ std::vector<int> indexes = outputMetadata.GetScoreDimInfo().GetValidIndexAll();
if (indexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- int classes = mOutputLayerProperty.layers[info.GetName()].shape[indexes[0]];
+ int classes = mOutputLayerProperty.layers[outputMetadata.GetScoreName()].shape[indexes[0]];
- if (!mOutputTensorBuffers.exist(info.GetName())) {
+ if (!mOutputTensorBuffers.exist(outputMetadata.GetScoreName())) {
LOGE("output buffe is NULL");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
PostProcess postProc;
- postProc.ScoreClear(info.GetTopNumber());
- threadHold = info.GetThresHold();
+ postProc.ScoreClear(outputMetadata.GetScoreTopNumber());
+ threadHold = outputMetadata.GetScoreThreshold();
for (int cId = 0; cId < classes; ++cId) {
float value = 0.0f;
try {
- value = mOutputTensorBuffers.getValue<float>(info.GetName(), cId);
+ value = mOutputTensorBuffers.getValue<float>(outputMetadata.GetScoreName(), cId);
} catch (const std::exception& e) {
LOGE(" Fail to get getValue with %s", e.what());
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- if (info.GetDeQuant()) {
+ if (outputMetadata.GetScoreDeQuant()) {
value = PostProcess::dequant(value,
- info.GetDeQuant()->GetScale(),
- info.GetDeQuant()->GetZeroPoint());
+ outputMetadata.GetScoreDeQuant()->GetScale(),
+ outputMetadata.GetScoreDeQuant()->GetZeroPoint());
}
- if (info.GetType() == INFERENCE_SCORE_TYPE_SIGMOID)
+ if (outputMetadata.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID)
value = PostProcess::sigmoid(value);
if (value < threadHold)
int Inference::GetObjectDetectionResults(
ObjectDetectionResults *detectionResults)
{
- OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
- if (outputMeta.IsParsed()) {
+ if (mMetadata.GetOutputMeta().IsParsed()) {
+ OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+
// decoding type
- auto& boxInfo = outputMeta.GetBox();
- auto& scoreInfo = outputMeta.GetScore();
- if (!mOutputTensorBuffers.exist(boxInfo.GetName()) ||
- !mOutputTensorBuffers.exist(scoreInfo.GetName()) ){
+ if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) ||
+ !mOutputTensorBuffers.exist(outputMeta.GetScoreName()) ){
LOGE("output buffers named of %s or %s are NULL",
- boxInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+ outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str());
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
int boxOffset = 0;
int numberOfObjects = 0;
- if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
- std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+ if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+ std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+ boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
} else {
- std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+ std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+ boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
- std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+ std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
if (scoreIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- numberOfObjects = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]];
+ numberOfObjects = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
}
ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
- static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()),
- static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()),
+ static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()),
+ static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()),
numberOfObjects);
objDecoder.init();
int
Inference::GetFaceDetectionResults(FaceDetectionResults *detectionResults)
{
- OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
- if (outputMeta.IsParsed()) {
+ if (mMetadata.GetOutputMeta().IsParsed()) {
+ OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
+
// decoding type
- auto& boxInfo = outputMeta.GetBox();
- auto& scoreInfo = outputMeta.GetScore();
- if (!mOutputTensorBuffers.exist(boxInfo.GetName()) ||
- !mOutputTensorBuffers.exist(scoreInfo.GetName())){
+ if (!mOutputTensorBuffers.exist(outputMeta.GetBoxName()) ||
+ !mOutputTensorBuffers.exist(outputMeta.GetScoreName())){
LOGE("output buffers named of %s or %s are NULL",
- boxInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+ outputMeta.GetBoxName().c_str(), outputMeta.GetScoreName().c_str());
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
int boxOffset = 0;
int numberOfFaces = 0;
- if (boxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
- std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+ if (outputMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+ std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+ boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
} else {
- std::vector<int> boxIndexes = boxInfo.GetDimInfo().GetValidIndexAll();
+ std::vector<int> boxIndexes = outputMeta.GetBoxDimInfo().GetValidIndexAll();
if (boxIndexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- boxOffset = mOutputLayerProperty.layers[boxInfo.GetName()].shape[boxIndexes[0]];
+ boxOffset = mOutputLayerProperty.layers[outputMeta.GetBoxName()].shape[boxIndexes[0]];
- std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
+ std::vector<int> scoreIndexes = outputMeta.GetScoreDimInfo().GetValidIndexAll();
if (scoreIndexes.size() != 1) {
LOGE("Invaid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- numberOfFaces = mOutputLayerProperty.layers[scoreInfo.GetName()].shape[scoreIndexes[0]];
+ numberOfFaces = mOutputLayerProperty.layers[outputMeta.GetScoreName()].shape[scoreIndexes[0]];
}
ObjectDecoder objDecoder(mOutputTensorBuffers, outputMeta, boxOffset,
- static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth()),
- static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight()),
+ static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth()),
+ static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight()),
numberOfFaces);
objDecoder.init();
{
LOGI("ENTER");
- OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
- if (outputMeta.IsParsed()) {
- auto& landmarkInfo = outputMeta.GetLandmark();
- auto& scoreInfo = outputMeta.GetScore();
+ if (mMetadata.GetOutputMeta().IsParsed()) {
+ OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
- if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
- !mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+ if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) ||
+ !mOutputTensorBuffers.exist(outputMeta.GetScoreName())) {
LOGE("output buffers named of %s or %s are NULL",
- landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+ outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str());
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
int heatMapWidth = 0;
int heatMapHeight = 0;
int heatMapChannel = 0;
- std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+ std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll();
int number_of_landmarks = heatMapChannel;
- if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+ if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
LOGI("landmark dim size: %zd and idx[0] is %d", channelIndexes.size(), channelIndexes[0]);
- number_of_landmarks = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]]
- / landmarkInfo.GetOffset();
+ number_of_landmarks = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]]
+ / outputMeta.GetLandmarkOffset();
} else {
- heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
- heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
- heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+ heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
+ heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
+ heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
}
LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
float inputW = 1.f;
float inputH = 1.f;
- if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
- inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
- inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+ if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
+ inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
+ inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
}
- float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
- outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
+ float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+ outputMeta.GetLandmarkHeatMapInfo().nmsRadius;
poseDecoder.decode(inputW, inputH, thresRadius);
{
LOGI("ENTER");
- OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
auto poseResult = std::make_unique<mv_inference_pose_s>();
- if (outputMeta.IsParsed()) {
- auto& landmarkInfo = outputMeta.GetLandmark();
- auto& scoreInfo = outputMeta.GetScore();
+ if (mMetadata.GetOutputMeta().IsParsed()) {
+ OutputMetadata& outputMeta = mMetadata.GetOutputMeta();
- if (!mOutputTensorBuffers.exist(landmarkInfo.GetName()) ||
- !mOutputTensorBuffers.exist(scoreInfo.GetName())) {
+ if (!mOutputTensorBuffers.exist(outputMeta.GetLandmarkName()) ||
+ !mOutputTensorBuffers.exist(outputMeta.GetScoreName())) {
LOGE("output buffers named of %s or %s are NULL",
- landmarkInfo.GetName().c_str(), scoreInfo.GetName().c_str());
+ outputMeta.GetLandmarkName().c_str(), outputMeta.GetScoreName().c_str());
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
int heatMapHeight = 0;
int heatMapChannel = 0;
- if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
- heatMapWidth = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.wIdx];
- heatMapHeight = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.hIdx];
- heatMapChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[landmarkInfo.GetDecodingInfo().heatMap.cIdx];
+ if (outputMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+ heatMapWidth = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().wIdx];
+ heatMapHeight = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().hIdx];
+ heatMapChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[outputMeta.GetLandmarkHeatMapInfo().cIdx];
}
LOGI("heatMap: w[%d], h[%d], c[%d]", heatMapWidth, heatMapHeight, heatMapChannel);
- std::vector<int> channelIndexes = landmarkInfo.GetDimInfo().GetValidIndexAll();
+ std::vector<int> channelIndexes = outputMeta.GetLandmarkDimInfo().GetValidIndexAll();
// If INFERENCE_LANDMARK_DECODING_TYPE_BYPASS,
// the landmarkChannel is guessed from the shape of the landmark output tensor.
// Otherwise, it is guessed from the heatMapChannel. (heatMapChannel is used in default).
int landmarkChannel = heatMapChannel;
- if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
- landmarkChannel = mOutputLayerProperty.layers[landmarkInfo.GetName()].shape[channelIndexes[0]] / landmarkInfo.GetOffset();
+ if (outputMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS)
+ landmarkChannel = mOutputLayerProperty.layers[outputMeta.GetLandmarkName()].shape[channelIndexes[0]] / outputMeta.GetLandmarkOffset();
poseResult->number_of_landmarks_per_pose = mUserListName.empty() ? landmarkChannel :
static_cast<int>(mUserListName.size());
float inputW = 1.f;
float inputH = 1.f;
- float thresRadius = landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
- outputMeta.GetLandmark().GetDecodingInfo().heatMap.nmsRadius;
- if (landmarkInfo.GetCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
- inputW = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetWidth());
- inputH = static_cast<float>(mMetadata.GetInputMeta().layer.begin()->second.GetHeight());
+ float thresRadius = outputMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ? 0.0 :
+ outputMeta.GetLandmarkHeatMapInfo().nmsRadius;
+ if (outputMeta.GetLandmarkCoordinate() == INFERENCE_LANDMARK_COORDINATE_TYPE_PIXEL) {
+ inputW = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetWidth());
+ inputH = static_cast<float>(mMetadata.GetInputMeta().GetLayer().begin()->second.GetHeight());
}
poseDecoder.decode(inputW, inputH, thresRadius);
{
int ObjectDecoder::init()
{
- if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
- if (!mTensorBuffer.exist(mMeta.GetLabel().GetName()) ||
- !mTensorBuffer.exist(mMeta.GetNumber().GetName()) ) {
+ if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+ if (!mTensorBuffer.exist(mMeta.GetLabelName()) ||
+ !mTensorBuffer.exist(mMeta.GetNumberName()) ) {
LOGE("buffer buffers named of %s or %s are NULL",
- mMeta.GetLabel().GetName().c_str(),
- mMeta.GetNumber().GetName().c_str());
+ mMeta.GetLabelName().c_str(), mMeta.GetNumberName().c_str());
+
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- std::vector<int> indexes = mMeta.GetNumber().GetDimInfo().GetValidIndexAll();
+ std::vector<int> indexes = mMeta.GetNumberDimInfo().GetValidIndexAll();
if (indexes.size() != 1) {
LOGE("Invalid dim size. It should be 1");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
// mNumberOfObjects is set again if INFERENCE_BOX_DECODING_TYPE_BYPASS.
// Otherwise it is set already within ctor.
mNumberOfOjects = mTensorBuffer.getValue<int>(
- mMeta.GetNumber().GetName(), indexes[0]);
+ mMeta.GetNumberName(), indexes[0]);
} else {
- if (mBoxInfo.GetDecodeInfo().IsAnchorBoxEmpty()) {
+ if (mMeta.GetBoxDecodeInfo().IsAnchorBoxEmpty()) {
LOGE("Anchor boxes are required but empty.");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
float ObjectDecoder::decodeScore(int idx)
{
- float score = mTensorBuffer.getValue<float>(mScoreInfo.GetName(), idx);
- if (mScoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+ float score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+ if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
score = PostProcess::sigmoid(score);
}
- return score < mScoreInfo.GetThresHold() ? 0.0f : score;
+ return score < mMeta.GetScoreThreshold() ? 0.0f : score;
}
Box ObjectDecoder::decodeBox(int idx, float score, int label)
{
// assume type is (cx,cy,w,h)
// left or cx
- float cx = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
- idx * mBoxOffset + mBoxInfo.GetOrder()[0]);
+ float cx = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+ idx * mBoxOffset + mMeta.GetBoxOrder()[0]);
// top or cy
- float cy = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
- idx * mBoxOffset + mBoxInfo.GetOrder()[1]);
+ float cy = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+ idx * mBoxOffset + mMeta.GetBoxOrder()[1]);
// right or width
- float cWidth = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
- idx * mBoxOffset + mBoxInfo.GetOrder()[2]);
+ float cWidth = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+ idx * mBoxOffset + mMeta.GetBoxOrder()[2]);
// bottom or height
- float cHeight = mTensorBuffer.getValue<float>(mBoxInfo.GetName(),
- idx * mBoxOffset + mBoxInfo.GetOrder()[3]);
+ float cHeight = mTensorBuffer.getValue<float>(mMeta.GetBoxName(),
+ idx * mBoxOffset + mMeta.GetBoxOrder()[3]);
// convert type to ORIGIN_CENTER if ORIGIN_LEFTTOP
- if (mBoxInfo.GetType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
+ if (mMeta.GetBoxType() == INFERENCE_BOX_TYPE_ORIGIN_LEFTTOP) {
float tmpCx = cx;
float tmpCy = cy;
cx = (cx + cWidth) * 0.5f; // (left + right)/2
}
// convert coordinate to RATIO if PIXEL
- if (mBoxInfo.GetCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
+ if (mMeta.GetScoreCoordinate() == INFERENCE_BOX_COORDINATE_TYPE_PIXEL) {
cx /= mScaleW;
cy /= mScaleH;
cWidth /= mScaleW;
}
Box box = {
- .index = mMeta.GetLabel().GetName().empty() ?
+ .index = mMeta.GetLabelName().empty() ?
label :
- mTensorBuffer.getValue<int>(mMeta.GetLabel().GetName(), idx),
+ mTensorBuffer.getValue<int>(mMeta.GetLabelName(), idx),
.score = score,
.location = cv::Rect2f(cx, cy, cWidth, cHeight)
};
// location coordinate of box, the output of decodeBox(), is relative between 0 ~ 1
Box box = decodeBox(anchorIdx, score, idx);
- if (mBoxInfo.GetDecodeInfo().IsFixedAnchorSize()) {
+ if (mMeta.GetBoxDecodeInfo().IsFixedAnchorSize()) {
box.location.x += anchor.x;
box.location.y += anchor.y;
} else {
- box.location.x = box.location.x / mBoxInfo.GetDecodeInfo().GetAnchorXscale() *
+ box.location.x = box.location.x / mMeta.GetBoxDecodeInfo().GetAnchorXscale() *
anchor.width + anchor.x;
- box.location.y = box.location.y / mBoxInfo.GetDecodeInfo().GetAnchorYscale() *
+ box.location.y = box.location.y / mMeta.GetBoxDecodeInfo().GetAnchorYscale() *
anchor.height + anchor.y;
}
- if (mBoxInfo.GetDecodeInfo().IsExponentialBoxScale()) {
+ if (mMeta.GetBoxDecodeInfo().IsExponentialBoxScale()) {
box.location.width = anchor.width *
- std::exp(box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale());
+ std::exp(box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale());
box.location.height = anchor.height *
- std::exp(box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale());
+ std::exp(box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale());
} else {
box.location.width = anchor.width *
- box.location.width/mBoxInfo.GetDecodeInfo().GetAnchorWscale();
+ box.location.width / mMeta.GetBoxDecodeInfo().GetAnchorWscale();
box.location.height = anchor.height *
- box.location.height/mBoxInfo.GetDecodeInfo().GetAnchorHscale();
+ box.location.height / mMeta.GetBoxDecodeInfo().GetAnchorHscale();
}
return box;
int ret = MEDIA_VISION_ERROR_NONE;
for (int idx = 0; idx < mNumberOfOjects; ++idx) {
- if (mBoxInfo.GetDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
+ if (mMeta.GetBoxDecodingType() == INFERENCE_BOX_DECODING_TYPE_BYPASS) {
float score = decodeScore(idx);
if (score <= 0.0f)
continue;
int anchorIdx = -1;
Boxes boxes;
- for (auto& anchorBox : mBoxInfo.GetDecodeInfo().GetAnchorBoxAll()) {
+ for (auto& anchorBox : mMeta.GetBoxDecodeInfo().GetAnchorBoxAll()) {
anchorIdx++;
float score = decodeScore(anchorIdx * mNumberOfOjects + idx);
if (!boxList.empty()) {
PostProcess postProc;
ret = postProc.Nms(boxList,
- mBoxInfo.GetDecodeInfo().GetNmsMode(),
- mBoxInfo.GetDecodeInfo().GetNmsIouThreshold(),
+ mMeta.GetBoxDecodeInfo().GetNmsMode(),
+ mMeta.GetBoxDecodeInfo().GetNmsIouThreshold(),
mResultBoxes);
if (ret != MEDIA_VISION_ERROR_NONE) {
LOGE("Fail to non-maximum suppression[%d]", ret);
score(),
box(),
label(),
+ number(),
landmark(),
offsetVec(),
dispVecs(),
name(),
dimInfo(),
threshold(0.0),
- type(INFERENCE_SCORE_TYPE_NORMAL),
topNumber(1),
+ type(INFERENCE_SCORE_TYPE_NORMAL),
deQuantization(nullptr)
{
// Score type
return MEDIA_VISION_ERROR_NONE;
}
- void BoxInfo::DecodeInfo::AddAnchorBox(cv::Rect2f& anchor)
+ void DecodeInfo::AddAnchorBox(cv::Rect2f& anchor)
{
anchorBoxes.push_back(anchor);
}
- void BoxInfo::DecodeInfo::ClearAnchorBox()
+ void DecodeInfo::ClearAnchorBox()
{
anchorBoxes.clear();
}
- std::vector<cv::Rect2f>& BoxInfo::DecodeInfo::GetAnchorBoxAll()
+ std::vector<cv::Rect2f>& DecodeInfo::GetAnchorBoxAll()
{
return anchorBoxes;
}
- bool BoxInfo::DecodeInfo::IsAnchorBoxEmpty()
+ bool DecodeInfo::IsAnchorBoxEmpty()
{
return anchorBoxes.empty();
}
}
- int BoxInfo::DecodeInfo::ParseAnchorParam(JsonObject *root)
+ int DecodeInfo::ParseAnchorParam(JsonObject *root)
{
JsonObject *object = json_object_get_object_member(root, "anchor") ;
return MEDIA_VISION_ERROR_NONE;
}
- float BoxInfo::DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
+ float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
{
return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
}
- bool BoxInfo::DecodeInfo::IsFixedAnchorSize()
+ bool DecodeInfo::IsFixedAnchorSize()
{
return this->anchorParam.isFixedAnchorSize;;
}
- bool BoxInfo::DecodeInfo::IsExponentialBoxScale()
+ bool DecodeInfo::IsExponentialBoxScale()
{
return this->anchorParam.isExponentialBoxScale;
}
- float BoxInfo::DecodeInfo::GetAnchorXscale()
+ float DecodeInfo::GetAnchorXscale()
{
return this->anchorParam.xScale;
}
- float BoxInfo::DecodeInfo::GetAnchorYscale()
+ float DecodeInfo::GetAnchorYscale()
{
return this->anchorParam.yScale;
}
- float BoxInfo::DecodeInfo::GetAnchorWscale()
+ float DecodeInfo::GetAnchorWscale()
{
return this->anchorParam.wScale;
}
- float BoxInfo::DecodeInfo::GetAnchorHscale()
+ float DecodeInfo::GetAnchorHscale()
{
return this->anchorParam.hScale;
}
- int BoxInfo::DecodeInfo::GenerateAnchor()
+ int DecodeInfo::GenerateAnchor()
{
- //BoxInfo::DecodeInfo& decodeInfo = box.GetDecodeInfo();
-
if (this->anchorParam.strides.empty() ||
this->anchorParam.aspectRatios.empty()) {
LOGE("Invalid anchor parameters");
return MEDIA_VISION_ERROR_NONE;
}
- int BoxInfo::DecodeInfo::ParseNms(JsonObject *root)
+ int DecodeInfo::ParseNms(JsonObject *root)
{
if (!json_object_has_member(root, "nms")) {
LOGI("nms is empty. skip it");
return MEDIA_VISION_ERROR_NONE;
}
- int BoxInfo::DecodeInfo::GetNmsMode()
+ int DecodeInfo::GetNmsMode()
{
return this->nmsParam.mode;
}
- float BoxInfo::DecodeInfo::GetNmsIouThreshold()
+ float DecodeInfo::GetNmsIouThreshold()
{
return this->nmsParam.iouThreshold;
}
- int BoxInfo::DecodeInfo::ParseRotate(JsonObject *root)
+ int DecodeInfo::ParseRotate(JsonObject *root)
{
if (!json_object_has_member(root, "rotate")) {
LOGI("rotate is empty. skip it");
return MEDIA_VISION_ERROR_NONE;
}
- int BoxInfo::DecodeInfo::GetRotStartPointIndex()
+ int DecodeInfo::GetRotStartPointIndex()
{
return this->rotParam.startPointIndex;
}
- int BoxInfo::DecodeInfo::GetRotEndPointIndex()
+ int DecodeInfo::GetRotEndPointIndex()
{
return this->rotParam.endPointIndex;
}
- float BoxInfo::DecodeInfo::GetBaseAngle()
+ float DecodeInfo::GetBaseAngle()
{
return this->rotParam.baseAngle;
}
- int BoxInfo::DecodeInfo::GetRoiMode()
+ int DecodeInfo::GetRoiMode()
{
return this->roiOptParam.mode;
}
- int BoxInfo::DecodeInfo::GetRoiStartPointIndex()
+ int DecodeInfo::GetRoiStartPointIndex()
{
return this->roiOptParam.startPointIndex;
}
- int BoxInfo::DecodeInfo::GetRoiEndPointIndex()
+ int DecodeInfo::GetRoiEndPointIndex()
{
return this->roiOptParam.endPointIndex;
}
- int BoxInfo::DecodeInfo::GetRoiCenterPointIndex()
+ int DecodeInfo::GetRoiCenterPointIndex()
{
return this->roiOptParam.centerPointIndex;
}
- float BoxInfo::DecodeInfo::GetShiftX()
+ float DecodeInfo::GetShiftX()
{
return this->roiOptParam.shiftX;
}
- float BoxInfo::DecodeInfo::GetShiftY()
+ float DecodeInfo::GetShiftY()
{
return this->roiOptParam.shiftY;
}
- float BoxInfo::DecodeInfo::GetScaleX()
+ float DecodeInfo::GetScaleX()
{
return this->roiOptParam.scaleX;
}
- float BoxInfo::DecodeInfo::GetScaleY()
+ float DecodeInfo::GetScaleY()
{
return this->roiOptParam.scaleY;
}
- int BoxInfo::DecodeInfo::ParseRoiOption(JsonObject *root)
+ int DecodeInfo::ParseRoiOption(JsonObject *root)
{
if (!json_object_has_member(root, "roi")) {
LOGI("roi is empty. skip it");
return MEDIA_VISION_ERROR_NONE;
}
- ScoreInfo& OutputMetadata::GetScore()
- {
- return score;
- }
-
- BoxInfo& OutputMetadata::GetBox()
- {
- return box;
- }
-
- Label& OutputMetadata::GetLabel()
- {
- return label;
- }
-
- Number& OutputMetadata::GetNumber()
- {
- return number;
- }
-
- Landmark& OutputMetadata::GetLandmark()
- {
- return landmark;
- }
-
- OffsetVec& OutputMetadata::GetOffset()
- {
- return offsetVec;
- }
-
- std::vector<DispVec>& OutputMetadata::GetDispVecAll()
- {
- return dispVecs;
- }
-
- Edge& OutputMetadata::GetEdge()
- {
- return edgeMap;
- }
-
- std::vector<std::pair<int, int>>& Edge::GetEdgesAll()
- {
- return edges;
- }
-
- bool OutputMetadata::IsParsed()
- {
- return parsed;
- }
-
Landmark::Landmark() :
name(),
dimInfo(),
offset(),
coordinate(INFERENCE_LANDMARK_COORDINATE_TYPE_RATIO),
decodingType(INFERENCE_LANDMARK_DECODING_TYPE_BYPASS),
- decodingInfo()
+ heatMapInfo()
{
supportedLandmarkTypes.insert({"2D_SINGLE", INFERENCE_LANDMARK_TYPE_2D_SINGLE});
return decodingType;
}
- Landmark::DecodeInfo& Landmark::GetDecodingInfo()
+ HeatMapInfo& Landmark::GetHeatMapInfo()
{
- return decodingInfo;
+ return heatMapInfo;
}
int OutputMetadata::ParseLandmark(JsonObject *root)
JsonObject *object = json_object_get_object_member(cObject, "heatmap") ;
try {
- landmark.GetDecodingInfo().heatMap.shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
+ landmark.GetHeatMapInfo().shapeType = OutputMetadata::GetSupportedType(object, "shape_type", supportedTensorShapes);
} catch (const std::exception& e) {
LOGE("Invalid %s", e.what());
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
std::vector<int> heatMapIndexes = landmark.GetDimInfo().GetValidIndexAll();
- if (landmark.GetDecodingInfo().heatMap.shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
- landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[0];
- landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[1];
- landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[2];
+ if (landmark.GetHeatMapInfo().shapeType == INFERENCE_TENSOR_SHAPE_NCHW) {
+ landmark.GetHeatMapInfo().cIdx = heatMapIndexes[0];
+ landmark.GetHeatMapInfo().hIdx = heatMapIndexes[1];
+ landmark.GetHeatMapInfo().wIdx = heatMapIndexes[2];
} else {
- landmark.GetDecodingInfo().heatMap.hIdx = heatMapIndexes[0];
- landmark.GetDecodingInfo().heatMap.wIdx = heatMapIndexes[1];
- landmark.GetDecodingInfo().heatMap.cIdx = heatMapIndexes[2];
+ landmark.GetHeatMapInfo().hIdx = heatMapIndexes[0];
+ landmark.GetHeatMapInfo().wIdx = heatMapIndexes[1];
+ landmark.GetHeatMapInfo().cIdx = heatMapIndexes[2];
}
if (json_object_has_member(object, "nms_radius")) {
- landmark.GetDecodingInfo().heatMap.nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
- LOGI("nms is enabled with %3.f", landmark.GetDecodingInfo().heatMap.nmsRadius );
+ landmark.GetHeatMapInfo().nmsRadius = static_cast<float>(json_object_get_double_member(object, "nms_radius"));
+ LOGI("nms is enabled with %3.f", landmark.GetHeatMapInfo().nmsRadius );
}
}
{
LOGI("ENTER");
- Landmark& landmarkInfo = mMeta.GetLandmark();
-
- if (landmarkInfo.GetType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- landmarkInfo.GetType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+ if (mMeta.GetLandmarkType() < INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType() > INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
LOGE("Not supported landmark type");
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
- if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+ if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
LOGI("Skip init");
return MEDIA_VISION_ERROR_NONE;
}
float score, localScore;
int idx;
bool isLocalMax;
- ScoreInfo& scoreInfo = mMeta.GetScore();
mCandidates.clear();
- if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+ if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
mCandidates.resize(mHeatMapChannel);
}
for (c = 0; c < mHeatMapChannel; ++c, candidate++) {
isLocalMax = true;
idx = convertXYZtoX(x, y, c);
- score = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
- if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+ score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+ if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
score = PostProcess::sigmoid(score);
}
- if (score < scoreInfo.GetThresHold())
+ if (score < mMeta.GetScoreThreshold())
continue;
- if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+ if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
if (score <= candidate->score)
continue;
for (dy = sy; dy < ey; ++dy) {
for (dx = sx; dx < ex; ++dx) {
idx = convertXYZtoX(dx, dy, c);
- localScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), idx);
- if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+ localScore = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+ if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
localScore = PostProcess::sigmoid(localScore);
}
if (localScore > score) {
int PoseDecoder::getOffsetValue(LandmarkPoint& landmark, cv::Point2f &offsetVal)
{
- if (!mTensorBuffer.exist(mMeta.GetOffset().GetName())) {
+ if (!mTensorBuffer.exist(mMeta.GetOffsetVecName())) {
offsetVal.x = offsetVal.y = 0.f;
LOGI("No offset value");
LOGI("LEAVE");
cv::Point idx = convertXYZtoXY(landmark.heatMapLoc.x, landmark.heatMapLoc.y, landmark.id);
try {
- offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.x);
- offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffset().GetName(), idx.y);
+ offsetVal.x = mTensorBuffer.getValue<float>(mMeta.GetOffsetVecName(), idx.x);
+ offsetVal.y = mTensorBuffer.getValue<float>(mMeta.GetOffsetVecName(), idx.y);
} catch (const std::exception& e) {
LOGE("Fail to get value at (%d, %d) from %s",
- idx.x, idx.y, mMeta.GetOffset().GetName().c_str());
+ idx.x, idx.y, mMeta.GetOffsetVecName().c_str());
return MEDIA_VISION_ERROR_INVALID_OPERATION;
}
}
mPoseLandmarks.clear();
- LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
- Landmark& landmarkInfo = mMeta.GetLandmark();
- ScoreInfo& scoreInfo = mMeta.GetScore();
+ LandmarkPoint initValue = {0.0f, cv::Point(0,0), cv::Point2f(0.0f, 0.0f), -1, false};
- if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
+ if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_3D_SINGLE) {
mPoseLandmarks.resize(1);
- if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+ if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
mPoseLandmarks[0].landmarks.resize(mNumberOfLandmarks);
} else {
mPoseLandmarks[0].landmarks.resize(mHeatMapChannel);
}
}
- if (landmarkInfo.GetDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+ if (mMeta.GetLandmarkDecodingType() != INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
while (!mCandidates.empty()) {
LandmarkPoint &root = mCandidates.front();
getIndexToPos(root, scaleWidth, scaleHeight);
- if (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
+ if (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE) {
root.valid = true;
mPoseLandmarks[0].landmarks[root.id] = root;
mPoseLandmarks[0].score += root.score;
}
} else {
// multi pose is not supported
- std::vector<int> scoreIndexes = scoreInfo.GetDimInfo().GetValidIndexAll();
- float poseScore = scoreInfo.GetThresHold();
+ std::vector<int> scoreIndexes = mMeta.GetScoreDimInfo().GetValidIndexAll();
+ float poseScore = mMeta.GetScoreThreshold();
if (!scoreIndexes.empty()) {
- poseScore = mTensorBuffer.getValue<float>(scoreInfo.GetName(), scoreIndexes[scoreIndexes[0]]);
- if (scoreInfo.GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+ poseScore = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), scoreIndexes[scoreIndexes[0]]);
+ if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
poseScore = PostProcess::sigmoid(poseScore);
}
- if (poseScore < scoreInfo.GetThresHold()) {
- LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, scoreInfo.GetThresHold());
+ if (poseScore < mMeta.GetScoreThreshold()) {
+ LOGI("pose score %.4f is lower than %.4f\n[LEAVE]", poseScore, mMeta.GetScoreThreshold());
return MEDIA_VISION_ERROR_NONE;
}
}
- int landmarkOffset = (landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
- landmarkInfo.GetType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
- if (landmarkInfo.GetDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
- landmarkOffset = landmarkInfo.GetOffset();
+ int landmarkOffset = (mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_SINGLE ||
+ mMeta.GetLandmarkType() == INFERENCE_LANDMARK_TYPE_2D_MULTI) ? 2 : 3;
+ if (mMeta.GetLandmarkDecodingType() == INFERENCE_LANDMARK_DECODING_TYPE_BYPASS) {
+ landmarkOffset = mMeta.GetLandmarkOffset();
}
for (int idx = 0; idx < mNumberOfLandmarks; ++idx) {
- float px = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset);
- float py = mTensorBuffer.getValue<float>(landmarkInfo.GetName(), idx * landmarkOffset + 1);
+ float px = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset);
+ float py = mTensorBuffer.getValue<float>(mMeta.GetLandmarkName(), idx * landmarkOffset + 1);
mPoseLandmarks[0].landmarks[idx].score = poseScore;
mPoseLandmarks[0].landmarks[idx].heatMapLoc = cv::Point(-1, -1);
LOGI("KeyId: [%d], heatMap: %d, %d", root.id, root.heatMapLoc.x, root.heatMapLoc.y);
LOGI("KeyId: [%d], decoded: %.4f, %.4f, score %.3f", root.id, root.decodedLoc.x, root.decodedLoc.y, root.score);
- int index = static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) - 1;
- for (auto riter = mMeta.GetEdge().GetEdgesAll().rbegin();
- riter != mMeta.GetEdge().GetEdgesAll().rend(); ++riter) {
+ int index = static_cast<int>(mMeta.GetEdges().size()) - 1;
+ for (auto riter = mMeta.GetEdges().rbegin();
+ riter != mMeta.GetEdges().rend(); ++riter) {
int fromKeyId = riter->second;
int toKeyId = riter->first;
}
index = 0;
- for (auto iter = mMeta.GetEdge().GetEdgesAll().begin();
- iter != mMeta.GetEdge().GetEdgesAll().end(); ++iter) {
+ for (auto iter = mMeta.GetEdges().begin();
+ iter != mMeta.GetEdges().end(); ++iter) {
int fromKeyId = iter->first;
int toKeyId = iter->second;
}
int idx = convertXYZtoX(toLandmark.heatMapLoc.x, toLandmark.heatMapLoc.y, toLandmark.id);
- toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScore().GetName(), idx);
- if (mMeta.GetScore().GetType() == INFERENCE_SCORE_TYPE_SIGMOID) {
+ toLandmark.score = mTensorBuffer.getValue<float>(mMeta.GetScoreName(), idx);
+ if (mMeta.GetScoreType() == INFERENCE_SCORE_TYPE_SIGMOID) {
toLandmark.score = PostProcess::sigmoid(toLandmark.score);
}
{
LOGI("ENTER");
- LOGI("edge size: %zd", mMeta.GetEdge().GetEdgesAll().size());
+ LOGI("edge size: %zd", mMeta.GetEdges().size());
+
int idxY = index.y * mHeatMapWidth
- * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2;
- idxY += index.x * static_cast<int>(mMeta.GetEdge().GetEdgesAll().size()) * 2 + edgeId;
+ * static_cast<int>(mMeta.GetEdges().size()) * 2;
+
+ idxY += index.x * static_cast<int>(mMeta.GetEdges().size()) * 2 + edgeId;
- int idxX = idxY + static_cast<int>(mMeta.GetEdge().GetEdgesAll().size());
+ int idxX = idxY + static_cast<int>(mMeta.GetEdges().size());
for(auto& dispVec : mMeta.GetDispVecAll()){
if (dispVec.GetType() == type) { // 0: forward
Name: capi-media-vision
Summary: Media Vision library for Tizen Native API
-Version: 0.8.17
+Version: 0.8.18
Release: 1
Group: Multimedia/Framework
License: Apache-2.0 and BSD-3-Clause