1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
11 #include <opencv2/imgproc.hpp>
24 std::vector<String> outNames;
26 void predict(Net& net, const Mat& frame, OutputArrayOfArrays outs)
29 CV_Error(Error::StsBadSize, "Input size not specified");
31 blob = blobFromImage(frame, scale, size, mean, swapRB, crop);
34 // Faster-RCNN or R-FCN
35 if (net.getLayer(0)->outputNameToIndex("im_info") != -1)
37 Mat imInfo = (Mat_<float>(1, 3) << size.height, size.width, 1.6f);
38 net.setInput(imInfo, "im_info");
40 net.forward(outs, outNames);
44 Model::Model() : impl(new Impl) {}
46 Model::Model(const String& model, const String& config)
47 : Net(readNet(model, config)), impl(new Impl)
49 impl->outNames = getUnconnectedOutLayersNames();
50 std::vector<MatShape> inLayerShapes;
51 std::vector<MatShape> outLayerShapes;
52 getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes);
53 if (!inLayerShapes.empty() && inLayerShapes[0].size() == 4)
54 impl->size = Size(inLayerShapes[0][3], inLayerShapes[0][2]);
57 Model::Model(const Net& network) : Net(network), impl(new Impl)
59 impl->outNames = getUnconnectedOutLayersNames();
60 std::vector<MatShape> inLayerShapes;
61 std::vector<MatShape> outLayerShapes;
62 getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes);
63 if (!inLayerShapes.empty() && inLayerShapes[0].size() == 4)
64 impl->size = Size(inLayerShapes[0][3], inLayerShapes[0][2]);
67 Model& Model::setInputSize(const Size& size)
73 Model& Model::setInputSize(int width, int height)
75 impl->size = Size(width, height);
79 Model& Model::setInputMean(const Scalar& mean)
85 Model& Model::setInputScale(double scale)
91 Model& Model::setInputCrop(bool crop)
97 Model& Model::setInputSwapRB(bool swapRB)
99 impl->swapRB = swapRB;
103 void Model::setInputParams(double scale, const Size& size, const Scalar& mean,
104 bool swapRB, bool crop)
110 impl->swapRB = swapRB;
113 void Model::predict(InputArray frame, OutputArrayOfArrays outs)
115 impl->predict(*this, frame.getMat(), outs);
118 ClassificationModel::ClassificationModel(const String& model, const String& config)
119 : Model(model, config) {};
121 ClassificationModel::ClassificationModel(const Net& network) : Model(network) {};
123 std::pair<int, float> ClassificationModel::classify(InputArray frame)
125 std::vector<Mat> outs;
126 impl->predict(*this, frame.getMat(), outs);
127 CV_Assert(outs.size() == 1);
131 minMaxLoc(outs[0].reshape(1, 1), nullptr, &conf, nullptr, &maxLoc);
132 return {maxLoc.x, static_cast<float>(conf)};
135 void ClassificationModel::classify(InputArray frame, int& classId, float& conf)
137 std::tie(classId, conf) = classify(frame);
140 KeypointsModel::KeypointsModel(const String& model, const String& config)
141 : Model(model, config) {};
143 KeypointsModel::KeypointsModel(const Net& network) : Model(network) {};
145 std::vector<Point2f> KeypointsModel::estimate(InputArray frame, float thresh)
148 int frameHeight = frame.getMat().size[0];
149 int frameWidth = frame.getMat().size[1];
150 std::vector<Mat> outs;
152 impl->predict(*this, frame.getMat(), outs);
153 CV_Assert(outs.size() == 1);
154 Mat output = outs[0];
156 const int nPoints = output.size[1];
157 std::vector<Point2f> points;
159 // If output is a map, extract the keypoints
160 if (output.dims == 4)
162 int height = output.size[2];
163 int width = output.size[3];
165 // find the position of the keypoints (ignore the background)
166 for (int n=0; n < nPoints - 1; n++)
168 // Probability map of corresponding keypoint
169 Mat probMap(height, width, CV_32F, output.ptr(0, n));
174 minMaxLoc(probMap, NULL, &prob, NULL, &maxLoc);
178 p.x *= (float)frameWidth / width;
179 p.y *= (float)frameHeight / height;
184 // Otherwise the output is a vector of keypoints and we can just return it
187 for (int n=0; n < nPoints; n++)
190 p.x = *output.ptr<float>(0, n, 0);
191 p.y = *output.ptr<float>(0, n, 1);
198 SegmentationModel::SegmentationModel(const String& model, const String& config)
199 : Model(model, config) {};
201 SegmentationModel::SegmentationModel(const Net& network) : Model(network) {};
203 void SegmentationModel::segment(InputArray frame, OutputArray mask)
206 std::vector<Mat> outs;
207 impl->predict(*this, frame.getMat(), outs);
208 CV_Assert(outs.size() == 1);
211 const int chns = score.size[1];
212 const int rows = score.size[2];
213 const int cols = score.size[3];
215 mask.create(rows, cols, CV_8U);
216 Mat classIds = mask.getMat();
218 Mat maxVal(rows, cols, CV_32F, score.data);
220 for (int ch = 1; ch < chns; ch++)
222 for (int row = 0; row < rows; row++)
224 const float *ptrScore = score.ptr<float>(0, ch, row);
225 uint8_t *ptrMaxCl = classIds.ptr<uint8_t>(row);
226 float *ptrMaxVal = maxVal.ptr<float>(row);
227 for (int col = 0; col < cols; col++)
229 if (ptrScore[col] > ptrMaxVal[col])
231 ptrMaxVal[col] = ptrScore[col];
239 DetectionModel::DetectionModel(const String& model, const String& config)
240 : Model(model, config) {};
242 DetectionModel::DetectionModel(const Net& network) : Model(network) {};
244 void DetectionModel::detect(InputArray frame, CV_OUT std::vector<int>& classIds,
245 CV_OUT std::vector<float>& confidences, CV_OUT std::vector<Rect>& boxes,
246 float confThreshold, float nmsThreshold)
248 std::vector<Mat> detections;
249 impl->predict(*this, frame.getMat(), detections);
255 int frameWidth = frame.cols();
256 int frameHeight = frame.rows();
257 if (getLayer(0)->outputNameToIndex("im_info") != -1)
259 frameWidth = impl->size.width;
260 frameHeight = impl->size.height;
263 std::vector<String> layerNames = getLayerNames();
264 int lastLayerId = getLayerId(layerNames.back());
265 Ptr<Layer> lastLayer = getLayer(lastLayerId);
267 std::vector<int> predClassIds;
268 std::vector<Rect> predBoxes;
269 std::vector<float> predConf;
270 if (lastLayer->type == "DetectionOutput")
272 // Network produces output blob with a shape 1x1xNx7 where N is a number of
273 // detections and an every detection is a vector of values
274 // [batchId, classId, confidence, left, top, right, bottom]
275 for (int i = 0; i < detections.size(); ++i)
277 float* data = (float*)detections[i].data;
278 for (int j = 0; j < detections[i].total(); j += 7)
280 float conf = data[j + 2];
281 if (conf < confThreshold)
284 int left = data[j + 3];
285 int top = data[j + 4];
286 int right = data[j + 5];
287 int bottom = data[j + 6];
288 int width = right - left + 1;
289 int height = bottom - top + 1;
291 if (width <= 2 || height <= 2)
293 left = data[j + 3] * frameWidth;
294 top = data[j + 4] * frameHeight;
295 right = data[j + 5] * frameWidth;
296 bottom = data[j + 6] * frameHeight;
297 width = right - left + 1;
298 height = bottom - top + 1;
301 left = std::max(0, std::min(left, frameWidth - 1));
302 top = std::max(0, std::min(top, frameHeight - 1));
303 width = std::max(1, std::min(width, frameWidth - left));
304 height = std::max(1, std::min(height, frameHeight - top));
305 predBoxes.emplace_back(left, top, width, height);
307 predClassIds.push_back(static_cast<int>(data[j + 1]));
308 predConf.push_back(conf);
312 else if (lastLayer->type == "Region")
314 for (int i = 0; i < detections.size(); ++i)
316 // Network produces output blob with a shape NxC where N is a number of
317 // detected objects and C is a number of classes + 4 where the first 4
318 // numbers are [center_x, center_y, width, height]
319 float* data = (float*)detections[i].data;
320 for (int j = 0; j < detections[i].rows; ++j, data += detections[i].cols)
323 Mat scores = detections[i].row(j).colRange(5, detections[i].cols);
326 minMaxLoc(scores, nullptr, &conf, nullptr, &classIdPoint);
328 if (static_cast<float>(conf) < confThreshold)
331 int centerX = data[0] * frameWidth;
332 int centerY = data[1] * frameHeight;
333 int width = data[2] * frameWidth;
334 int height = data[3] * frameHeight;
336 int left = std::max(0, std::min(centerX - width / 2, frameWidth - 1));
337 int top = std::max(0, std::min(centerY - height / 2, frameHeight - 1));
338 width = std::max(1, std::min(width, frameWidth - left));
339 height = std::max(1, std::min(height, frameHeight - top));
341 predClassIds.push_back(classIdPoint.x);
342 predConf.push_back(static_cast<float>(conf));
343 predBoxes.emplace_back(left, top, width, height);
348 CV_Error(Error::StsNotImplemented, "Unknown output layer type: \"" + lastLayer->type + "\"");
352 std::vector<int> indices;
353 NMSBoxes(predBoxes, predConf, confThreshold, nmsThreshold, indices);
355 boxes.reserve(indices.size());
356 confidences.reserve(indices.size());
357 classIds.reserve(indices.size());
359 for (int idx : indices)
361 boxes.push_back(predBoxes[idx]);
362 confidences.push_back(predConf[idx]);
363 classIds.push_back(predClassIds[idx]);
368 boxes = std::move(predBoxes);
369 classIds = std::move(predClassIds);
370 confidences = std::move(predConf);