"{ height | -1 | Preprocess input image by resizing to a specific height. }"
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
"{ thr | .5 | Confidence threshold. }"
+ "{ thr | .4 | Non-maximum suppression threshold. }"
"{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default), "
"1: Halide language (http://halide-lang.org/), "
using namespace cv;
using namespace dnn;
-float confThreshold;
+float confThreshold, nmsThreshold;
std::vector<std::string> classes;
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
}
confThreshold = parser.get<float>("thr");
+ nmsThreshold = parser.get<float>("nms");
float scale = parser.get<float>("scale");
Scalar mean = parser.get<Scalar>("mean");
bool swapRB = parser.get<bool>("rgb");
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
+ std::vector<int> classIds;
+ std::vector<float> confidences;
+ std::vector<Rect> boxes;
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
int top = (int)data[i + 4];
int right = (int)data[i + 5];
int bottom = (int)data[i + 6];
- int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
- drawPred(classId, confidence, left, top, right, bottom, frame);
+ int width = right - left + 1;
+ int height = bottom - top + 1;
+ classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
+ boxes.push_back(Rect(left, top, width, height));
+ confidences.push_back(confidence);
}
}
}
int top = (int)(data[i + 4] * frame.rows);
int right = (int)(data[i + 5] * frame.cols);
int bottom = (int)(data[i + 6] * frame.rows);
- int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
- drawPred(classId, confidence, left, top, right, bottom, frame);
+ int width = right - left + 1;
+ int height = bottom - top + 1;
+ classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
+ boxes.push_back(Rect(left, top, width, height));
+ confidences.push_back(confidence);
}
}
}
else if (outLayerType == "Region")
{
- std::vector<int> classIds;
- std::vector<float> confidences;
- std::vector<Rect> boxes;
for (size_t i = 0; i < outs.size(); ++i)
{
// Network produces output blob with a shape NxC where N is a number of
}
}
}
- std::vector<int> indices;
- NMSBoxes(boxes, confidences, confThreshold, 0.4f, indices);
- for (size_t i = 0; i < indices.size(); ++i)
- {
- int idx = indices[i];
- Rect box = boxes[idx];
- drawPred(classIds[idx], confidences[idx], box.x, box.y,
- box.x + box.width, box.y + box.height, frame);
- }
}
else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
+
+ std::vector<int> indices;
+ NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
+ for (size_t i = 0; i < indices.size(); ++i)
+ {
+ int idx = indices[i];
+ Rect box = boxes[idx];
+ drawPred(classIds[idx], confidences[idx], box.x, box.y,
+ box.x + box.width, box.y + box.height, frame);
+ }
}
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
parser.add_argument('--rgb', action='store_true',
help='Indicate that model works with RGB input images instead BGR ones.')
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
+parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
help="Choose one of computation backends: "
"%d: automatically (by default), "
net.setPreferableTarget(args.target)
confThreshold = args.thr
+nmsThreshold = args.nms
def getOutputsNames(net):
layersNames = net.getLayerNames()
lastLayerId = net.getLayerId(layerNames[-1])
lastLayer = net.getLayer(lastLayerId)
+ classIds = []
+ confidences = []
+ boxes = []
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
# Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom]
- assert(len(outs) == 1)
- out = outs[0]
- for detection in out[0, 0]:
- confidence = detection[2]
- if confidence > confThreshold:
- left = int(detection[3])
- top = int(detection[4])
- right = int(detection[5])
- bottom = int(detection[6])
- classId = int(detection[1]) - 1 # Skip background label
- drawPred(classId, confidence, left, top, right, bottom)
+ for out in outs:
+ for detection in out[0, 0]:
+ confidence = detection[2]
+ if confidence > confThreshold:
+ left = int(detection[3])
+ top = int(detection[4])
+ right = int(detection[5])
+ bottom = int(detection[6])
+ width = right - left + 1
+ height = bottom - top + 1
+ classIds.append(int(detection[1]) - 1) # Skip background label
+ confidences.append(float(confidence))
+ boxes.append([left, top, width, height])
elif lastLayer.type == 'DetectionOutput':
# Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom]
- assert(len(outs) == 1)
- out = outs[0]
- for detection in out[0, 0]:
- confidence = detection[2]
- if confidence > confThreshold:
- left = int(detection[3] * frameWidth)
- top = int(detection[4] * frameHeight)
- right = int(detection[5] * frameWidth)
- bottom = int(detection[6] * frameHeight)
- classId = int(detection[1]) - 1 # Skip background label
- drawPred(classId, confidence, left, top, right, bottom)
+ for out in outs:
+ for detection in out[0, 0]:
+ confidence = detection[2]
+ if confidence > confThreshold:
+ left = int(detection[3] * frameWidth)
+ top = int(detection[4] * frameHeight)
+ right = int(detection[5] * frameWidth)
+ bottom = int(detection[6] * frameHeight)
+ width = right - left + 1
+ height = bottom - top + 1
+ classIds.append(int(detection[1]) - 1) # Skip background label
+ confidences.append(float(confidence))
+ boxes.append([left, top, width, height])
elif lastLayer.type == 'Region':
# Network produces output blob with a shape NxC where N is a number of
# detected objects and C is a number of classes + 4 where the first 4
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
- indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, 0.4)
- for i in indices:
- i = i[0]
- box = boxes[i]
- left = box[0]
- top = box[1]
- width = box[2]
- height = box[3]
- drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
+ else:
+ print('Unknown output layer type: ' + lastLayer.type)
+ exit()
+
+ indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
+ for i in indices:
+ i = i[0]
+ box = boxes[i]
+ left = box[0]
+ top = box[1]
+ width = box[2]
+ height = box[3]
+ drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
# Process inputs
winName = 'Deep learning object detection in OpenCV'