2 * Copyright (c) 2022 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <DecodeInfo.h>
20 using namespace mediavision::inference;
21 using namespace mediavision::inference::box;
23 void DecodeInfo::AddAnchorBox(cv::Rect2f &anchor)
25 anchorBoxes.push_back(anchor);
28 void DecodeInfo::ClearAnchorBox()
33 std::vector<cv::Rect2f> &DecodeInfo::GetAnchorBoxAll()
38 bool DecodeInfo::IsAnchorBoxEmpty()
40 return anchorBoxes.empty();
43 int DecodeInfo::ParseAnchorParam(JsonObject *root)
45 JsonObject *object = json_object_get_object_member(root, "anchor");
47 anchorParam.mode = static_cast<int>(json_object_get_int_member(object, "mode"));
49 if (anchorParam.mode == 0) { // SSD
50 anchorParam.numLayers = static_cast<int>(json_object_get_int_member(object, "num_layers"));
52 anchorParam.minScale = static_cast<float>(json_object_get_double_member(object, "min_scale"));
53 anchorParam.maxScale = static_cast<float>(json_object_get_double_member(object, "max_scale"));
55 anchorParam.isReduceBoxedInLowestLayer =
56 static_cast<bool>(json_object_get_boolean_member(object, "reduce_boxed_in_lowest_layer"));
57 anchorParam.interpolatedScaleAspectRatio =
58 static_cast<float>(json_object_get_double_member(object, "interpolated_scale_aspect_ratio"));
59 anchorParam.isFixedAnchorSize = static_cast<bool>(json_object_get_boolean_member(object, "fixed_anchor_size"));
60 anchorParam.isExponentialBoxScale =
61 static_cast<bool>(json_object_get_boolean_member(object, "exponential_box_scale"));
63 anchorParam.xScale = static_cast<float>(json_object_get_double_member(object, "x_scale"));
64 anchorParam.yScale = static_cast<float>(json_object_get_double_member(object, "y_scale"));
65 anchorParam.wScale = static_cast<float>(json_object_get_double_member(object, "w_scale"));
66 anchorParam.hScale = static_cast<float>(json_object_get_double_member(object, "h_scale"));
68 JsonArray *array = json_object_get_array_member(object, "aspect_ratios");
69 auto elements = json_array_get_length(array);
70 for (unsigned int elem2 = 0; elem2 < elements; ++elem2) {
71 auto aspectRatio = static_cast<float>(json_array_get_double_element(array, elem2));
72 anchorParam.aspectRatios.push_back(aspectRatio);
73 LOGI("aspectRatio: %.4f", aspectRatio);
75 } else if (anchorParam.mode == 1) { // Yolo
76 anchorParam.offsetAnchors = static_cast<int>(json_object_get_int_member(object, "offset_anchors"));
77 JsonArray *xScales = json_object_get_array_member(object, "x_scales");
78 JsonArray *yScales = json_object_get_array_member(object, "y_scales");
79 unsigned int xElements2 = json_array_get_length(xScales);
80 unsigned int yElements2 = json_array_get_length(yScales);
81 if (xElements2 != yElements2) {
82 LOGE("Invalid x and y scales. They should be the same size");
83 return MEDIA_VISION_ERROR_INVALID_OPERATION;
86 std::vector<double> xScale_;
87 std::vector<double> yScale_;
88 for (unsigned int arrayElem2 = 0; arrayElem2 < xElements2; ++arrayElem2) {
89 auto xScale = static_cast<double>(json_array_get_double_element(xScales, arrayElem2));
90 auto yScale = static_cast<double>(json_array_get_double_element(yScales, arrayElem2));
91 LOGI("xScale:%lf, yScale:%lf", xScale, yScale);
92 xScale_.push_back(xScale);
93 yScale_.push_back(yScale);
95 anchorParam.vxScales = xScale_;
96 anchorParam.vyScales = yScale_;
99 LOGE("Invalid anchor mode [%d]", anchorParam.mode);
100 return MEDIA_VISION_ERROR_INVALID_PARAMETER;
103 anchorParam.inputSizeHeight = static_cast<int>(json_object_get_int_member(object, "input_size_height"));
104 anchorParam.inputSizeWidth = static_cast<int>(json_object_get_int_member(object, "input_size_width"));
105 anchorParam.anchorOffsetX = static_cast<float>(json_object_get_double_member(object, "anchor_offset_x"));
106 anchorParam.anchorOffsetY = static_cast<float>(json_object_get_double_member(object, "anchor_offset_y"));
108 JsonArray *array = json_object_get_array_member(object, "strides");
109 unsigned int elements2 = json_array_get_length(array);
110 for (unsigned int elem2 = 0; elem2 < elements2; ++elem2) {
111 auto stride = static_cast<int>(json_array_get_int_element(array, elem2));
112 anchorParam.strides.push_back(stride);
113 LOGI("stride: %d", stride);
116 return MEDIA_VISION_ERROR_NONE;
119 float DecodeInfo::CalculateScale(float min, float max, int index, int maxStride)
121 return min + (max - min) * 1.0 * index / (maxStride - 1.0f);
124 bool DecodeInfo::IsFixedAnchorSize()
126 return anchorParam.isFixedAnchorSize;
129 bool DecodeInfo::IsExponentialBoxScale()
131 return anchorParam.isExponentialBoxScale;
134 float DecodeInfo::GetAnchorXscale()
136 return anchorParam.xScale;
139 float DecodeInfo::GetAnchorYscale()
141 return anchorParam.yScale;
144 float DecodeInfo::GetAnchorWscale()
146 return anchorParam.wScale;
149 float DecodeInfo::GetAnchorHscale()
151 return anchorParam.hScale;
154 int DecodeInfo::GenerateAnchor()
156 if (anchorParam.strides.empty() || anchorParam.aspectRatios.empty()) {
157 LOGE("Invalid anchor parameters");
158 return MEDIA_VISION_ERROR_INVALID_OPERATION;
163 while (layerId < anchorParam.numLayers) {
164 std::vector<float> anchorHeight;
165 std::vector<float> anchorWidth;
166 std::vector<float> aspectRatios;
167 std::vector<float> scales;
169 int lastSameStrideLayer = layerId;
170 std::vector<float>::iterator iter1, iter2;
171 while ((lastSameStrideLayer < anchorParam.numLayers) &&
172 (anchorParam.strides[lastSameStrideLayer] == anchorParam.strides[layerId])) {
173 const float scale = CalculateScale(anchorParam.minScale, anchorParam.maxScale, lastSameStrideLayer,
174 anchorParam.strides.size());
176 if (lastSameStrideLayer == 0 && anchorParam.isReduceBoxedInLowestLayer) {
177 aspectRatios.push_back(1.0);
178 aspectRatios.push_back(2.0);
179 aspectRatios.push_back(0.5);
180 scales.push_back(0.1);
181 scales.push_back(scale);
182 scales.push_back(scale);
184 for (iter1 = anchorParam.aspectRatios.begin(); iter1 != anchorParam.aspectRatios.end(); ++iter1) {
185 aspectRatios.push_back((*iter1));
186 scales.push_back(scale);
188 if (anchorParam.interpolatedScaleAspectRatio > 0.0f) {
189 const float scaleNext = lastSameStrideLayer == static_cast<int>(anchorParam.strides.size()) - 1 ?
191 CalculateScale(anchorParam.minScale, anchorParam.maxScale,
192 lastSameStrideLayer + 1, anchorParam.strides.size());
193 scales.push_back(std::sqrt(scale * scaleNext));
194 aspectRatios.push_back(anchorParam.interpolatedScaleAspectRatio);
197 lastSameStrideLayer++;
200 for (iter1 = aspectRatios.begin(), iter2 = scales.begin();
201 (iter1 != aspectRatios.end() && iter2 != scales.end()); ++iter1, ++iter2) {
202 const float ratioSqrts = std::sqrt((*iter1));
203 anchorHeight.push_back((*iter2) / ratioSqrts);
204 anchorWidth.push_back((*iter2) * ratioSqrts);
207 const int stride = anchorParam.strides[layerId];
208 int featureMapHeight = std::ceil(1.0f * anchorParam.inputSizeHeight / stride);
209 int featureMapWidth = std::ceil(1.0f * anchorParam.inputSizeWidth / stride);
211 for (int y = 0; y < featureMapHeight; ++y) {
212 for (int x = 0; x < featureMapWidth; ++x) {
213 for (int anchorId = 0; anchorId < (int) anchorHeight.size(); ++anchorId) {
214 cv::Rect2f anchor = { cv::Point2f { (x + anchorParam.anchorOffsetX) * 1.0f / featureMapWidth,
215 (y + anchorParam.anchorOffsetY) * 1.0f / featureMapHeight },
216 anchorParam.isFixedAnchorSize ?
217 cv::Size2f { 1.0f, 1.0f } :
218 cv::Size2f { anchorWidth[anchorId], anchorWidth[anchorId] } };
219 AddAnchorBox(anchor);
223 layerId = lastSameStrideLayer;
226 if (IsAnchorBoxEmpty()) {
227 LOGE("Anchor boxes are empty");
228 return MEDIA_VISION_ERROR_INVALID_OPERATION;
231 return MEDIA_VISION_ERROR_NONE;
234 int DecodeInfo::ParseNms(JsonObject *root)
236 if (!json_object_has_member(root, "nms")) {
237 LOGI("nms is empty. skip it");
238 return MEDIA_VISION_ERROR_NONE;
241 JsonObject *object = json_object_get_object_member(root, "nms");
243 nmsParam.mode = GetSupportedType(object, "mode", nmsParam.supportedBoxNmsTypes);
244 } catch (const std::exception &e) {
245 LOGE("Invalid %s", e.what());
246 return MEDIA_VISION_ERROR_INVALID_OPERATION;
249 nmsParam.iouThreshold = static_cast<float>(json_object_get_double_member(object, "iou_threshold"));
251 return MEDIA_VISION_ERROR_NONE;
254 int DecodeInfo::GetNmsMode()
256 return nmsParam.mode;
259 float DecodeInfo::GetNmsIouThreshold()
261 return nmsParam.iouThreshold;
264 int DecodeInfo::ParseRotate(JsonObject *root)
266 if (!json_object_has_member(root, "rotate")) {
267 LOGI("rotate is empty. skip it");
268 return MEDIA_VISION_ERROR_NONE;
271 JsonObject *object = json_object_get_object_member(root, "rotate");
272 rotParam.baseAngle = static_cast<float>(json_object_get_double_member(object, "base_angle"));
273 rotParam.startPointIndex = static_cast<int>(json_object_get_int_member(object, "start_point_index"));
274 rotParam.endPointIndex = static_cast<int>(json_object_get_int_member(object, "end_point_index"));
276 return MEDIA_VISION_ERROR_NONE;
279 int DecodeInfo::GetRotStartPointIndex()
281 return rotParam.startPointIndex;
284 int DecodeInfo::GetRotEndPointIndex()
286 return rotParam.endPointIndex;
289 float DecodeInfo::GetBaseAngle()
291 return rotParam.baseAngle;
294 int DecodeInfo::GetRoiMode()
296 return roiOptParam.mode;
299 int DecodeInfo::GetRoiStartPointIndex()
301 return roiOptParam.startPointIndex;
304 int DecodeInfo::GetRoiEndPointIndex()
306 return roiOptParam.endPointIndex;
309 int DecodeInfo::GetRoiCenterPointIndex()
311 return roiOptParam.centerPointIndex;
314 float DecodeInfo::GetShiftX()
316 return roiOptParam.shiftX;
319 float DecodeInfo::GetShiftY()
321 return roiOptParam.shiftY;
324 float DecodeInfo::GetScaleX()
326 return roiOptParam.scaleX;
329 float DecodeInfo::GetScaleY()
331 return roiOptParam.scaleY;
334 int DecodeInfo::ParseRoiOption(JsonObject *root)
336 if (!json_object_has_member(root, "roi")) {
337 LOGI("roi is empty. skip it");
338 return MEDIA_VISION_ERROR_NONE;
341 JsonObject *object = json_object_get_object_member(root, "roi");
342 roiOptParam.startPointIndex = static_cast<int>(json_object_get_int_member(object, "start_point_index"));
343 roiOptParam.endPointIndex = static_cast<int>(json_object_get_int_member(object, "end_point_index"));
344 roiOptParam.centerPointIndex = static_cast<int>(json_object_get_int_member(object, "center_point_index"));
345 roiOptParam.shiftX = static_cast<float>(json_object_get_double_member(object, "shift_x"));
346 roiOptParam.shiftY = static_cast<float>(json_object_get_double_member(object, "shift_y"));
347 roiOptParam.scaleX = static_cast<float>(json_object_get_double_member(object, "scale_x"));
348 roiOptParam.scaleY = static_cast<float>(json_object_get_double_member(object, "scale_y"));
349 roiOptParam.mode = static_cast<int>(json_object_get_int_member(object, "scale_mode"));
351 return MEDIA_VISION_ERROR_NONE;
355 * @ref https://wikidocs.net/163607
357 int DecodeInfo::GenerateYOLOAnchor()
359 constexpr int maxAnchorPerCell = 3;
361 auto anchorIndex = vAnchorBoxes.size();
362 std::vector<cv::Rect2f> cal;
363 auto stride = anchorParam.strides[anchorIndex];
364 auto gridHeight = anchorParam.inputSizeHeight / stride;
365 auto gridWidth = anchorParam.inputSizeWidth / stride;
367 for (int y = 0; y < gridHeight; ++y) {
368 for (int x = 0; x < gridWidth; ++x) {
369 for (int anchorPerCell = 0; anchorPerCell < maxAnchorPerCell; ++anchorPerCell) {
370 cv::Rect2f anchor = { cv::Point2f { (static_cast<float>(x) + anchorParam.anchorOffsetX),
371 (static_cast<float>(y) + anchorParam.anchorOffsetY) },
372 cv::Size2f { anchorParam.vxScales[anchorPerCell] * static_cast<float>(stride),
373 anchorParam.vyScales[anchorPerCell] * static_cast<float>(stride) } };
374 cal.push_back(anchor);
378 anchorParam.totalAnchors += cal.size();
379 vAnchorBoxes.push_back(cal);
382 return MEDIA_VISION_ERROR_NONE;