1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #ifndef __OPENCV_OBJDETECT_HPP__
44 #define __OPENCV_OBJDETECT_HPP__
46 #include "opencv2/core/core.hpp"
55 /****************************************************************************************\
56 * Haar-like Object Detection functions *
57 \****************************************************************************************/
59 #define CV_HAAR_MAGIC_VAL 0x42500000
60 #define CV_TYPE_NAME_HAAR "opencv-haar-classifier"
62 #define CV_IS_HAAR_CLASSIFIER( haar ) \
64 (((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL)
66 #define CV_HAAR_FEATURE_MAX 3
68 typedef struct CvHaarFeature
75 } rect[CV_HAAR_FEATURE_MAX];
78 typedef struct CvHaarClassifier
81 CvHaarFeature* haar_feature;
88 typedef struct CvHaarStageClassifier
92 CvHaarClassifier* classifier;
97 } CvHaarStageClassifier;
99 typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade;
101 typedef struct CvHaarClassifierCascade
105 CvSize orig_window_size;
106 CvSize real_window_size;
108 CvHaarStageClassifier* stage_classifier;
109 CvHidHaarClassifierCascade* hid_cascade;
110 } CvHaarClassifierCascade;
112 typedef struct CvAvgComp
118 /* Loads haar classifier cascade from a directory.
119 It is obsolete: convert your cascade to xml and use cvLoad instead */
120 CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade(
121 const char* directory, CvSize orig_window_size);
123 CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade );
125 #define CV_HAAR_DO_CANNY_PRUNING 1
126 #define CV_HAAR_SCALE_IMAGE 2
127 #define CV_HAAR_FIND_BIGGEST_OBJECT 4
128 #define CV_HAAR_DO_ROUGH_SEARCH 8
130 //CVAPI(CvSeq*) cvHaarDetectObjectsForROC( const CvArr* image,
131 // CvHaarClassifierCascade* cascade, CvMemStorage* storage,
132 // CvSeq** rejectLevels, CvSeq** levelWeightds,
133 // double scale_factor CV_DEFAULT(1.1),
134 // int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
135 // CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
136 // bool outputRejectLevels = false );
139 CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image,
140 CvHaarClassifierCascade* cascade, CvMemStorage* storage,
141 double scale_factor CV_DEFAULT(1.1),
142 int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
143 CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)));
145 /* sets images for haar classifier cascade */
146 CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade,
147 const CvArr* sum, const CvArr* sqsum,
148 const CvArr* tilted_sum, double scale );
150 /* runs the cascade on the specified window */
151 CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
152 CvPoint pt, int start_stage CV_DEFAULT(0));
155 /****************************************************************************************\
156 * Latent SVM Object Detection functions *
157 \****************************************************************************************/
159 // DataType: STRUCT position
160 // Structure describes the position of the filter in the feature pyramid
161 // l - level in the feature pyramid
162 // (x, y) - coordinate in level l
163 typedef struct CvLSVMFilterPosition
168 } CvLSVMFilterPosition;
170 // DataType: STRUCT filterObject
171 // Description of the filter, which corresponds to the part of the object
172 // V - ideal (penalty = 0) position of the partial filter
173 // from the root filter position (V_i in the paper)
174 // penaltyFunction - vector describes penalty function (d_i in the paper)
175 // pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
176 // FILTER DESCRIPTION
177 // Rectangular map (sizeX x sizeY),
178 // every cell stores feature vector (dimension = p)
179 // H - matrix of feature vectors
180 // to set and get feature vectors (i,j)
181 // used formula H[(j * sizeX + i) * p + k], where
182 // k - component of feature vector in cell (i, j)
183 // END OF FILTER DESCRIPTION
184 typedef struct CvLSVMFilterObject{
185 CvLSVMFilterPosition V;
186 float fineFunction[4];
191 } CvLSVMFilterObject;
193 // data type: STRUCT CvLatentSvmDetector
194 // structure contains internal representation of trained Latent SVM detector
195 // num_filters - total number of filters (root plus part) in model
196 // num_components - number of components in model
197 // num_part_filters - array containing number of part filters for each component
198 // filters - root and part filters for all model components
199 // b - biases for all model components
200 // score_threshold - confidence level threshold
201 typedef struct CvLatentSvmDetector
205 int* num_part_filters;
206 CvLSVMFilterObject** filters;
208 float score_threshold;
212 // data type: STRUCT CvObjectDetection
213 // structure contains the bounding box and confidence level for detected object
214 // rect - bounding box for a detected object
215 // score - confidence level
216 typedef struct CvObjectDetection
222 //////////////// Object Detection using Latent SVM //////////////
226 // load trained detector from a file
229 // CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
231 // filename - path to the file containing the parameters of
232 - trained Latent SVM detector
234 // trained Latent SVM detector in internal representation
236 CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);
239 // release memory allocated for CvLatentSvmDetector structure
242 // void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
244 // detector - CvLatentSvmDetector structure to be released
247 CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
250 // find rectangular regions in the given image that are likely
251 // to contain objects and corresponding confidence levels
254 // CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
255 // CvLatentSvmDetector* detector,
256 // CvMemStorage* storage,
257 // float overlap_threshold = 0.5f,
258 // int numThreads = -1);
260 // image - image to detect objects in
261 // detector - Latent SVM detector in internal representation
262 // storage - memory storage to store the resultant sequence
263 // of the object candidate rectangles
264 // overlap_threshold - threshold for the non-maximum suppression algorithm
265 = 0.5f [here will be the reference to original paper]
267 // sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
269 CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
270 CvLatentSvmDetector* detector,
271 CvMemStorage* storage,
272 float overlap_threshold CV_DEFAULT(0.5f),
273 int numThreads CV_DEFAULT(-1));
278 CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image,
279 CvHaarClassifierCascade* cascade, CvMemStorage* storage,
280 std::vector<int>& rejectLevels, std::vector<double>& levelWeightds,
281 double scale_factor CV_DEFAULT(1.1),
282 int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
283 CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
284 bool outputRejectLevels = false );
289 ///////////////////////////// Object Detection ////////////////////////////
292 * This is a class wrapping up the structure CvLatentSvmDetector and functions working with it.
293 * The class goals are:
294 * 1) provide c++ interface;
295 * 2) make it possible to load and detect more than one class (model) unlike CvLatentSvmDetector.
297 class CV_EXPORTS LatentSvmDetector
300 struct CV_EXPORTS ObjectDetection
303 ObjectDetection( const Rect& rect, float score, int classID=-1 );
310 LatentSvmDetector( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
311 virtual ~LatentSvmDetector();
313 virtual void clear();
314 virtual bool empty() const;
315 bool load( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
317 virtual void detect( const Mat& image,
318 vector<ObjectDetection>& objectDetections,
319 float overlapThreshold=0.5f,
322 const vector<string>& getClassNames() const;
323 size_t getClassCount() const;
326 vector<CvLatentSvmDetector*> detectors;
327 vector<string> classNames;
330 // class for grouping object candidates, detected by Cascade Classifier, HOG etc.
331 // instance of the class is to be passed to cv::partition (see cxoperations.hpp)
332 class CV_EXPORTS SimilarRects
335 SimilarRects(double _eps) : eps(_eps) {}
336 inline bool operator()(const Rect& r1, const Rect& r2) const
338 double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;
339 return std::abs(r1.x - r2.x) <= delta &&
340 std::abs(r1.y - r2.y) <= delta &&
341 std::abs(r1.x + r1.width - r2.x - r2.width) <= delta &&
342 std::abs(r1.y + r1.height - r2.y - r2.height) <= delta;
347 CV_EXPORTS void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, int groupThreshold, double eps=0.2);
348 CV_EXPORTS_W void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, CV_OUT vector<int>& weights, int groupThreshold, double eps=0.2);
349 CV_EXPORTS void groupRectangles( vector<Rect>& rectList, int groupThreshold, double eps, vector<int>* weights, vector<double>* levelWeights );
350 CV_EXPORTS void groupRectangles(vector<Rect>& rectList, vector<int>& rejectLevels,
351 vector<double>& levelWeights, int groupThreshold, double eps=0.2);
352 CV_EXPORTS void groupRectangles_meanshift(vector<Rect>& rectList, vector<double>& foundWeights, vector<double>& foundScales,
353 double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
356 class CV_EXPORTS FeatureEvaluator
359 enum { HAAR = 0, LBP = 1, HOG = 2 };
360 virtual ~FeatureEvaluator();
362 virtual bool read(const FileNode& node);
363 virtual Ptr<FeatureEvaluator> clone() const;
364 virtual int getFeatureType() const;
366 virtual bool setImage(const Mat& img, Size origWinSize);
367 virtual bool setWindow(Point p);
369 virtual double calcOrd(int featureIdx) const;
370 virtual int calcCat(int featureIdx) const;
372 static Ptr<FeatureEvaluator> create(int type);
375 template<> CV_EXPORTS void Ptr<CvHaarClassifierCascade>::delete_obj();
379 CASCADE_DO_CANNY_PRUNING=1,
380 CASCADE_SCALE_IMAGE=2,
381 CASCADE_FIND_BIGGEST_OBJECT=4,
382 CASCADE_DO_ROUGH_SEARCH=8
385 class CV_EXPORTS_W CascadeClassifier
388 CV_WRAP CascadeClassifier();
389 CV_WRAP CascadeClassifier( const string& filename );
390 virtual ~CascadeClassifier();
392 CV_WRAP virtual bool empty() const;
393 CV_WRAP bool load( const string& filename );
394 virtual bool read( const FileNode& node );
395 CV_WRAP virtual void detectMultiScale( const Mat& image,
396 CV_OUT vector<Rect>& objects,
397 double scaleFactor=1.1,
398 int minNeighbors=3, int flags=0,
400 Size maxSize=Size() );
402 CV_WRAP virtual void detectMultiScale( const Mat& image,
403 CV_OUT vector<Rect>& objects,
404 vector<int>& rejectLevels,
405 vector<double>& levelWeights,
406 double scaleFactor=1.1,
407 int minNeighbors=3, int flags=0,
410 bool outputRejectLevels=false );
413 bool isOldFormatCascade() const;
414 virtual Size getOriginalWindowSize() const;
415 int getFeatureType() const;
416 bool setImage( const Mat& );
419 //virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
420 // int stripSize, int yStep, double factor, vector<Rect>& candidates );
422 virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
423 int stripSize, int yStep, double factor, vector<Rect>& candidates,
424 vector<int>& rejectLevels, vector<double>& levelWeights, bool outputRejectLevels=false);
428 enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,
429 FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };
431 friend class CascadeClassifierInvoker;
433 template<class FEval>
434 friend int predictOrdered( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
436 template<class FEval>
437 friend int predictCategorical( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
439 template<class FEval>
440 friend int predictOrderedStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
442 template<class FEval>
443 friend int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
445 bool setImage( Ptr<FeatureEvaluator>& feval, const Mat& image);
446 virtual int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
451 struct CV_EXPORTS DTreeNode
454 float threshold; // for ordered features only
459 struct CV_EXPORTS DTree
464 struct CV_EXPORTS Stage
471 bool read(const FileNode &node);
480 vector<Stage> stages;
481 vector<DTree> classifiers;
482 vector<DTreeNode> nodes;
483 vector<float> leaves;
488 Ptr<FeatureEvaluator> featureEvaluator;
489 Ptr<CvHaarClassifierCascade> oldCascade;
492 class CV_EXPORTS MaskGenerator
495 virtual ~MaskGenerator() {}
496 virtual cv::Mat generateMask(const cv::Mat& src)=0;
497 virtual void initializeMask(const cv::Mat& /*src*/) {};
499 void setMaskGenerator(Ptr<MaskGenerator> maskGenerator);
500 Ptr<MaskGenerator> getMaskGenerator();
502 void setFaceDetectionMaskGenerator();
505 Ptr<MaskGenerator> maskGenerator;
509 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
511 // struct for detection region of interest (ROI)
514 // scale(size) of the bounding box
516 // set of requrested locations to be evaluated
517 vector<cv::Point> locations;
518 // vector that will contain confidence values for each location
519 vector<double> confidences;
522 struct CV_EXPORTS_W HOGDescriptor
526 enum { DEFAULT_NLEVELS=64 };
528 CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
529 cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
530 histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
531 nlevels(HOGDescriptor::DEFAULT_NLEVELS)
534 CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
535 Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
536 int _histogramNormType=HOGDescriptor::L2Hys,
537 double _L2HysThreshold=0.2, bool _gammaCorrection=false,
538 int _nlevels=HOGDescriptor::DEFAULT_NLEVELS)
539 : winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
540 nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
541 histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
542 gammaCorrection(_gammaCorrection), nlevels(_nlevels)
545 CV_WRAP HOGDescriptor(const String& filename)
550 HOGDescriptor(const HOGDescriptor& d)
555 virtual ~HOGDescriptor() {}
557 CV_WRAP size_t getDescriptorSize() const;
558 CV_WRAP bool checkDetectorSize() const;
559 CV_WRAP double getWinSigma() const;
561 CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
563 virtual bool read(FileNode& fn);
564 virtual void write(FileStorage& fs, const String& objname) const;
566 CV_WRAP virtual bool load(const String& filename, const String& objname=String());
567 CV_WRAP virtual void save(const String& filename, const String& objname=String()) const;
568 virtual void copyTo(HOGDescriptor& c) const;
570 CV_WRAP virtual void compute(const Mat& img,
571 CV_OUT vector<float>& descriptors,
572 Size winStride=Size(), Size padding=Size(),
573 const vector<Point>& locations=vector<Point>()) const;
574 //with found weights output
575 CV_WRAP virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
576 CV_OUT vector<double>& weights,
577 double hitThreshold=0, Size winStride=Size(),
579 const vector<Point>& searchLocations=vector<Point>()) const;
580 //without found weights output
581 virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
582 double hitThreshold=0, Size winStride=Size(),
584 const vector<Point>& searchLocations=vector<Point>()) const;
585 //with result weights output
586 CV_WRAP virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
587 CV_OUT vector<double>& foundWeights, double hitThreshold=0,
588 Size winStride=Size(), Size padding=Size(), double scale=1.05,
589 double finalThreshold=2.0,bool useMeanshiftGrouping = false) const;
590 //without found weights output
591 virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
592 double hitThreshold=0, Size winStride=Size(),
593 Size padding=Size(), double scale=1.05,
594 double finalThreshold=2.0, bool useMeanshiftGrouping = false) const;
596 CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
597 Size paddingTL=Size(), Size paddingBR=Size()) const;
599 CV_WRAP static vector<float> getDefaultPeopleDetector();
600 CV_WRAP static vector<float> getDaimlerPeopleDetector();
602 CV_PROP Size winSize;
603 CV_PROP Size blockSize;
604 CV_PROP Size blockStride;
605 CV_PROP Size cellSize;
607 CV_PROP int derivAperture;
608 CV_PROP double winSigma;
609 CV_PROP int histogramNormType;
610 CV_PROP double L2HysThreshold;
611 CV_PROP bool gammaCorrection;
612 CV_PROP vector<float> svmDetector;
616 // evaluate specified ROI and return confidence value for each location
617 void detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
618 CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
619 double hitThreshold = 0, cv::Size winStride = Size(),
620 cv::Size padding = Size()) const;
622 // evaluate specified ROI and return confidence value for each location in multiple scales
623 void detectMultiScaleROI(const cv::Mat& img,
624 CV_OUT std::vector<cv::Rect>& foundLocations,
625 std::vector<DetectionROI>& locations,
626 double hitThreshold = 0,
627 int groupThreshold = 0) const;
629 // read/parse Dalal's alt model file
630 void readALTModel(std::string modelfile);
631 void groupRectangles(vector<cv::Rect>& rectList, vector<double>& weights, int groupThreshold, double eps) const;
635 CV_EXPORTS_W void findDataMatrix(InputArray image,
636 CV_OUT vector<string>& codes,
637 OutputArray corners=noArray(),
638 OutputArrayOfArrays dmtx=noArray());
639 CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
640 const vector<string>& codes,
644 /****************************************************************************************\
646 \****************************************************************************************/
648 struct CV_EXPORTS CvDataMatrixCode {
654 CV_EXPORTS std::deque<CvDataMatrixCode> cvFindDataMatrix(CvMat *im);
656 /****************************************************************************************\
658 \****************************************************************************************/
664 using cv::FileStorage;
667 using cv::OutputArrayOfArrays;
673 /// @todo Convert doxy comments to rst
676 * \brief Discriminant feature described by its location and label.
678 struct CV_EXPORTS Feature
682 int label; ///< Quantization
684 Feature() : x(0), y(0), label(0) {}
685 Feature(int x, int y, int label);
687 void read(const FileNode& fn);
688 void write(FileStorage& fs) const;
691 inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
693 struct CV_EXPORTS Template
698 std::vector<Feature> features;
700 void read(const FileNode& fn);
701 void write(FileStorage& fs) const;
705 * \brief Represents a modality operating over an image pyramid.
707 class QuantizedPyramid
710 // Virtual destructor
711 virtual ~QuantizedPyramid() {}
714 * \brief Compute quantized image at current pyramid level for online detection.
716 * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
717 * representing its classification.
719 virtual void quantize(Mat& dst) const =0;
722 * \brief Extract most discriminant features at current pyramid level to form a new template.
724 * \param[out] templ The new template.
726 virtual bool extractTemplate(Template& templ) const =0;
729 * \brief Go to the next pyramid level.
731 * \todo Allow pyramid scale factor other than 2
733 virtual void pyrDown() =0;
736 /// Candidate feature with a score
739 Candidate(int x, int y, int label, float score);
741 /// Sort candidates with high score to the front
742 bool operator<(const Candidate& rhs) const
744 return score > rhs.score;
752 * \brief Choose candidate features so that they are not bunched together.
754 * \param[in] candidates Candidate features sorted by score.
755 * \param[out] features Destination vector of selected features.
756 * \param[in] num_features Number of candidates to select.
757 * \param[in] distance Hint for desired distance between features.
759 static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
760 std::vector<Feature>& features,
761 size_t num_features, float distance);
764 inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
767 * \brief Interface for modalities that plug into the LINE template matching representation.
769 * \todo Max response, to allow optimization of summing (255/MAX) features as uint8
771 class CV_EXPORTS Modality
774 // Virtual destructor
775 virtual ~Modality() {}
778 * \brief Form a quantized image pyramid from a source image.
780 * \param[in] src The source image. Type depends on the modality.
781 * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
782 * in quantized image and cannot be extracted as features.
784 Ptr<QuantizedPyramid> process(const Mat& src,
785 const Mat& mask = Mat()) const
787 return processImpl(src, mask);
790 virtual std::string name() const =0;
792 virtual void read(const FileNode& fn) =0;
793 virtual void write(FileStorage& fs) const =0;
796 * \brief Create modality by name.
798 * The following modality types are supported:
802 static Ptr<Modality> create(const std::string& modality_type);
805 * \brief Load a modality from file.
807 static Ptr<Modality> create(const FileNode& fn);
810 // Indirection is because process() has a default parameter.
811 virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
812 const Mat& mask) const =0;
816 * \brief Modality that computes quantized gradient orientations from a color image.
818 class CV_EXPORTS ColorGradient : public Modality
822 * \brief Default constructor. Uses reasonable default parameter values.
827 * \brief Constructor.
829 * \param weak_threshold When quantizing, discard gradients with magnitude less than this.
830 * \param num_features How many features a template must contain.
831 * \param strong_threshold Consider as candidate features only gradients whose norms are
834 ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
836 virtual std::string name() const;
838 virtual void read(const FileNode& fn);
839 virtual void write(FileStorage& fs) const;
841 float weak_threshold;
843 float strong_threshold;
846 virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
847 const Mat& mask) const;
851 * \brief Modality that computes quantized surface normals from a dense depth map.
853 class CV_EXPORTS DepthNormal : public Modality
857 * \brief Default constructor. Uses reasonable default parameter values.
862 * \brief Constructor.
864 * \param distance_threshold Ignore pixels beyond this distance.
865 * \param difference_threshold When computing normals, ignore contributions of pixels whose
866 * depth difference with the central pixel is above this threshold.
867 * \param num_features How many features a template must contain.
868 * \param extract_threshold Consider as candidate feature only if there are no differing
869 * orientations within a distance of extract_threshold.
871 DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
872 int extract_threshold);
874 virtual std::string name() const;
876 virtual void read(const FileNode& fn);
877 virtual void write(FileStorage& fs) const;
879 int distance_threshold;
880 int difference_threshold;
882 int extract_threshold;
885 virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
886 const Mat& mask) const;
890 * \brief Debug function to colormap a quantized image for viewing.
892 void colormap(const Mat& quantized, Mat& dst);
895 * \brief Represents a successful template match.
897 struct CV_EXPORTS Match
903 Match(int x, int y, float similarity, const std::string& class_id, int template_id);
905 /// Sort matches with high similarity to the front
906 bool operator<(const Match& rhs) const
908 // Secondarily sort on template_id for the sake of duplicate removal
909 if (similarity != rhs.similarity)
910 return similarity > rhs.similarity;
912 return template_id < rhs.template_id;
915 bool operator==(const Match& rhs) const
917 return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
923 std::string class_id;
927 inline Match::Match(int _x, int _y, float _similarity, const std::string& _class_id, int _template_id)
928 : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
933 * \brief Object detector using the LINE template matching algorithm with any set of
936 class CV_EXPORTS Detector
940 * \brief Empty constructor, initialize with read().
945 * \brief Constructor.
947 * \param modalities Modalities to use (color gradients, depth normals, ...).
948 * \param T_pyramid Value of the sampling step T at each pyramid level. The
949 * number of pyramid levels is T_pyramid.size().
951 Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
954 * \brief Detect objects by template matching.
956 * Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
958 * \param sources Source images, one for each modality.
959 * \param threshold Similarity threshold, a percentage between 0 and 100.
960 * \param[out] matches Template matches, sorted by similarity score.
961 * \param class_ids If non-empty, only search for the desired object classes.
962 * \param[out] quantized_images Optionally return vector<Mat> of quantized images.
963 * \param masks The masks for consideration during matching. The masks should be CV_8UC1
964 * where 255 represents a valid pixel. If non-empty, the vector must be
965 * the same size as sources. Each element must be
966 * empty or the same size as its corresponding source.
968 void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
969 const std::vector<std::string>& class_ids = std::vector<std::string>(),
970 OutputArrayOfArrays quantized_images = noArray(),
971 const std::vector<Mat>& masks = std::vector<Mat>()) const;
974 * \brief Add new object template.
976 * \param sources Source images, one for each modality.
977 * \param class_id Object class ID.
978 * \param object_mask Mask separating object from background.
979 * \param[out] bounding_box Optionally return bounding box of the extracted features.
981 * \return Template ID, or -1 if failed to extract a valid template.
983 int addTemplate(const std::vector<Mat>& sources, const std::string& class_id,
984 const Mat& object_mask, Rect* bounding_box = NULL);
987 * \brief Add a new object template computed by external means.
989 int addSyntheticTemplate(const std::vector<Template>& templates, const std::string& class_id);
992 * \brief Get the modalities used by this detector.
994 * You are not permitted to add/remove modalities, but you may dynamic_cast them to
997 const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
1000 * \brief Get sampling step T at pyramid_level.
1002 int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
1005 * \brief Get number of pyramid levels used by this detector.
1007 int pyramidLevels() const { return pyramid_levels; }
1010 * \brief Get the template pyramid identified by template_id.
1012 * For example, with 2 modalities (Gradient, Normal) and two pyramid levels
1013 * (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
1015 const std::vector<Template>& getTemplates(const std::string& class_id, int template_id) const;
1017 int numTemplates() const;
1018 int numTemplates(const std::string& class_id) const;
1019 int numClasses() const { return static_cast<int>(class_templates.size()); }
1021 std::vector<std::string> classIds() const;
1023 void read(const FileNode& fn);
1024 void write(FileStorage& fs) const;
1026 std::string readClass(const FileNode& fn, const std::string &class_id_override = "");
1027 void writeClass(const std::string& class_id, FileStorage& fs) const;
1029 void readClasses(const std::vector<std::string>& class_ids,
1030 const std::string& format = "templates_%s.yml.gz");
1031 void writeClasses(const std::string& format = "templates_%s.yml.gz") const;
1034 std::vector< Ptr<Modality> > modalities;
1036 std::vector<int> T_at_level;
1038 typedef std::vector<Template> TemplatePyramid;
1039 typedef std::map<std::string, std::vector<TemplatePyramid> > TemplatesMap;
1040 TemplatesMap class_templates;
1042 typedef std::vector<Mat> LinearMemories;
1043 // Indexed as [pyramid level][modality][quantized label]
1044 typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
1046 void matchClass(const LinearMemoryPyramid& lm_pyramid,
1047 const std::vector<Size>& sizes,
1048 float threshold, std::vector<Match>& matches,
1049 const std::string& class_id,
1050 const std::vector<TemplatePyramid>& template_pyramids) const;
1054 * \brief Factory function for detector using LINE algorithm with color gradients.
1056 * Default parameter settings suitable for VGA images.
1058 CV_EXPORTS Ptr<Detector> getDefaultLINE();
1061 * \brief Factory function for detector using LINE-MOD algorithm with color gradients
1062 * and depth normals.
1064 * Default parameter settings suitable for VGA images.
1066 CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
1068 } // namespace linemod