modules/objdetect/include/opencv2/objdetect/objdetect.hpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #ifndef __OPENCV_OBJDETECT_HPP__
  44 #define __OPENCV_OBJDETECT_HPP__
  45
  46 #include "opencv2/core/core.hpp"
  47
  48 #ifdef __cplusplus
  49 #include <map>
  50 #include <deque>
  51
  52 extern "C" {
  53 #endif
  54
  55 /****************************************************************************************\
  56 *                         Haar-like Object Detection functions                           *
  57 \****************************************************************************************/
  58
  59 #define CV_HAAR_MAGIC_VAL    0x42500000
  60 #define CV_TYPE_NAME_HAAR    "opencv-haar-classifier"
  61
  62 #define CV_IS_HAAR_CLASSIFIER( haar )                                                    \
  63     ((haar) != NULL &&                                                                   \
  64     (((const CvHaarClassifierCascade*)(haar))->flags & CV_MAGIC_MASK)==CV_HAAR_MAGIC_VAL)
  65
  66 #define CV_HAAR_FEATURE_MAX  3
  67
  68 typedef struct CvHaarFeature
  69 {
  70     int tilted;
  71     struct
  72     {
  73         CvRect r;
  74         float weight;
  75     } rect[CV_HAAR_FEATURE_MAX];
  76 } CvHaarFeature;
  77
  78 typedef struct CvHaarClassifier
  79 {
  80     int count;
  81     CvHaarFeature* haar_feature;
  82     float* threshold;
  83     int* left;
  84     int* right;
  85     float* alpha;
  86 } CvHaarClassifier;
  87
  88 typedef struct CvHaarStageClassifier
  89 {
  90     int  count;
  91     float threshold;
  92     CvHaarClassifier* classifier;
  93
  94     int next;
  95     int child;
  96     int parent;
  97 } CvHaarStageClassifier;
  98
  99 typedef struct CvHidHaarClassifierCascade CvHidHaarClassifierCascade;
 100
 101 typedef struct CvHaarClassifierCascade
 102 {
 103     int  flags;
 104     int  count;
 105     CvSize orig_window_size;
 106     CvSize real_window_size;
 107     double scale;
 108     CvHaarStageClassifier* stage_classifier;
 109     CvHidHaarClassifierCascade* hid_cascade;
 110 } CvHaarClassifierCascade;
 111
 112 typedef struct CvAvgComp
 113 {
 114     CvRect rect;
 115     int neighbors;
 116 } CvAvgComp;
 117
 118 /* Loads haar classifier cascade from a directory.
 119    It is obsolete: convert your cascade to xml and use cvLoad instead */
 120 CVAPI(CvHaarClassifierCascade*) cvLoadHaarClassifierCascade(
 121                     const char* directory, CvSize orig_window_size);
 122
 123 CVAPI(void) cvReleaseHaarClassifierCascade( CvHaarClassifierCascade** cascade );
 124
 125 #define CV_HAAR_DO_CANNY_PRUNING    1
 126 #define CV_HAAR_SCALE_IMAGE         2
 127 #define CV_HAAR_FIND_BIGGEST_OBJECT 4
 128 #define CV_HAAR_DO_ROUGH_SEARCH     8
 129
 130 //CVAPI(CvSeq*) cvHaarDetectObjectsForROC( const CvArr* image,
 131 //                     CvHaarClassifierCascade* cascade, CvMemStorage* storage,
 132 //                     CvSeq** rejectLevels, CvSeq** levelWeightds,
 133 //                     double scale_factor CV_DEFAULT(1.1),
 134 //                     int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
 135 //                     CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
 136 //                     bool outputRejectLevels = false );
 137
 138
 139 CVAPI(CvSeq*) cvHaarDetectObjects( const CvArr* image,
 140                      CvHaarClassifierCascade* cascade, CvMemStorage* storage,
 141                      double scale_factor CV_DEFAULT(1.1),
 142                      int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
 143                      CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)));
 144
 145 /* sets images for haar classifier cascade */
 146 CVAPI(void) cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* cascade,
 147                                                 const CvArr* sum, const CvArr* sqsum,
 148                                                 const CvArr* tilted_sum, double scale );
 149
 150 /* runs the cascade on the specified window */
 151 CVAPI(int) cvRunHaarClassifierCascade( const CvHaarClassifierCascade* cascade,
 152                                        CvPoint pt, int start_stage CV_DEFAULT(0));
 153
 154
 155 /****************************************************************************************\
 156 *                         Latent SVM Object Detection functions                          *
 157 \****************************************************************************************/
 158
 159 // DataType: STRUCT position
 160 // Structure describes the position of the filter in the feature pyramid
 161 // l - level in the feature pyramid
 162 // (x, y) - coordinate in level l
 163 typedef struct CvLSVMFilterPosition
 164 {
 165     int x;
 166     int y;
 167     int l;
 168 } CvLSVMFilterPosition;
 169
 170 // DataType: STRUCT filterObject
 171 // Description of the filter, which corresponds to the part of the object
 172 // V               - ideal (penalty = 0) position of the partial filter
 173 //                   from the root filter position (V_i in the paper)
 174 // penaltyFunction - vector describes penalty function (d_i in the paper)
 175 //                   pf[0] * x + pf[1] * y + pf[2] * x^2 + pf[3] * y^2
 176 // FILTER DESCRIPTION
 177 //   Rectangular map (sizeX x sizeY),
 178 //   every cell stores feature vector (dimension = p)
 179 // H               - matrix of feature vectors
 180 //                   to set and get feature vectors (i,j)
 181 //                   used formula H[(j * sizeX + i) * p + k], where
 182 //                   k - component of feature vector in cell (i, j)
 183 // END OF FILTER DESCRIPTION
 184 typedef struct CvLSVMFilterObject{
 185     CvLSVMFilterPosition V;
 186     float fineFunction[4];
 187     int sizeX;
 188     int sizeY;
 189     int numFeatures;
 190     float *H;
 191 } CvLSVMFilterObject;
 192
 193 // data type: STRUCT CvLatentSvmDetector
 194 // structure contains internal representation of trained Latent SVM detector
 195 // num_filters                  - total number of filters (root plus part) in model
 196 // num_components               - number of components in model
 197 // num_part_filters             - array containing number of part filters for each component
 198 // filters                              - root and part filters for all model components
 199 // b                                    - biases for all model components
 200 // score_threshold              - confidence level threshold
 201 typedef struct CvLatentSvmDetector
 202 {
 203     int num_filters;
 204     int num_components;
 205     int* num_part_filters;
 206     CvLSVMFilterObject** filters;
 207     float* b;
 208     float score_threshold;
 209 }
 210 CvLatentSvmDetector;
 211
 212 // data type: STRUCT CvObjectDetection
 213 // structure contains the bounding box and confidence level for detected object
 214 // rect                                 - bounding box for a detected object
 215 // score                                - confidence level
 216 typedef struct CvObjectDetection
 217 {
 218     CvRect rect;
 219     float score;
 220 } CvObjectDetection;
 221
 222 //////////////// Object Detection using Latent SVM //////////////
 223
 224
 225 /*
 226 // load trained detector from a file
 227 //
 228 // API
 229 // CvLatentSvmDetector* cvLoadLatentSvmDetector(const char* filename);
 230 // INPUT
 231 // filename                             - path to the file containing the parameters of
 232                         - trained Latent SVM detector
 233 // OUTPUT
 234 // trained Latent SVM detector in internal representation
 235 */
 236 CVAPI(CvLatentSvmDetector*) cvLoadLatentSvmDetector(const char* filename);
 237
 238 /*
 239 // release memory allocated for CvLatentSvmDetector structure
 240 //
 241 // API
 242 // void cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
 243 // INPUT
 244 // detector                             - CvLatentSvmDetector structure to be released
 245 // OUTPUT
 246 */
 247 CVAPI(void) cvReleaseLatentSvmDetector(CvLatentSvmDetector** detector);
 248
 249 /*
 250 // find rectangular regions in the given image that are likely
 251 // to contain objects and corresponding confidence levels
 252 //
 253 // API
 254 // CvSeq* cvLatentSvmDetectObjects(const IplImage* image,
 255 //                                                                      CvLatentSvmDetector* detector,
 256 //                                                                      CvMemStorage* storage,
 257 //                                                                      float overlap_threshold = 0.5f,
 258 //                                  int numThreads = -1);
 259 // INPUT
 260 // image                                - image to detect objects in
 261 // detector                             - Latent SVM detector in internal representation
 262 // storage                              - memory storage to store the resultant sequence
 263 //                                                      of the object candidate rectangles
 264 // overlap_threshold    - threshold for the non-maximum suppression algorithm
 265                            = 0.5f [here will be the reference to original paper]
 266 // OUTPUT
 267 // sequence of detected objects (bounding boxes and confidence levels stored in CvObjectDetection structures)
 268 */
 269 CVAPI(CvSeq*) cvLatentSvmDetectObjects(IplImage* image,
 270                                 CvLatentSvmDetector* detector,
 271                                 CvMemStorage* storage,
 272                                 float overlap_threshold CV_DEFAULT(0.5f),
 273                                 int numThreads CV_DEFAULT(-1));
 274
 275 #ifdef __cplusplus
 276 }
 277
 278 CV_EXPORTS CvSeq* cvHaarDetectObjectsForROC( const CvArr* image,
 279                      CvHaarClassifierCascade* cascade, CvMemStorage* storage,
 280                      std::vector<int>& rejectLevels, std::vector<double>& levelWeightds,
 281                      double scale_factor CV_DEFAULT(1.1),
 282                      int min_neighbors CV_DEFAULT(3), int flags CV_DEFAULT(0),
 283                      CvSize min_size CV_DEFAULT(cvSize(0,0)), CvSize max_size CV_DEFAULT(cvSize(0,0)),
 284                      bool outputRejectLevels = false );
 285
 286 namespace cv
 287 {
 288
 289 ///////////////////////////// Object Detection ////////////////////////////
 290
 291 /*
 292  * This is a class wrapping up the structure CvLatentSvmDetector and functions working with it.
 293  * The class goals are:
 294  * 1) provide c++ interface;
 295  * 2) make it possible to load and detect more than one class (model) unlike CvLatentSvmDetector.
 296  */
 297 class CV_EXPORTS LatentSvmDetector
 298 {
 299 public:
 300     struct CV_EXPORTS ObjectDetection
 301     {
 302         ObjectDetection();
 303         ObjectDetection( const Rect& rect, float score, int classID=-1 );
 304         Rect rect;
 305         float score;
 306         int classID;
 307     };
 308
 309     LatentSvmDetector();
 310     LatentSvmDetector( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
 311     virtual ~LatentSvmDetector();
 312
 313     virtual void clear();
 314     virtual bool empty() const;
 315     bool load( const vector<string>& filenames, const vector<string>& classNames=vector<string>() );
 316
 317     virtual void detect( const Mat& image,
 318                          vector<ObjectDetection>& objectDetections,
 319                          float overlapThreshold=0.5f,
 320                          int numThreads=-1 );
 321
 322     const vector<string>& getClassNames() const;
 323     size_t getClassCount() const;
 324
 325 private:
 326     vector<CvLatentSvmDetector*> detectors;
 327     vector<string> classNames;
 328 };
 329
 330 CV_EXPORTS void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, int groupThreshold, double eps=0.2);
 331 CV_EXPORTS_W void groupRectangles(CV_OUT CV_IN_OUT vector<Rect>& rectList, CV_OUT vector<int>& weights, int groupThreshold, double eps=0.2);
 332 CV_EXPORTS void groupRectangles( vector<Rect>& rectList, int groupThreshold, double eps, vector<int>* weights, vector<double>* levelWeights );
 333 CV_EXPORTS void groupRectangles(vector<Rect>& rectList, vector<int>& rejectLevels,
 334                                 vector<double>& levelWeights, int groupThreshold, double eps=0.2);
 335 CV_EXPORTS void groupRectangles_meanshift(vector<Rect>& rectList, vector<double>& foundWeights, vector<double>& foundScales,
 336                                           double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
 337
 338
 339 class CV_EXPORTS FeatureEvaluator
 340 {
 341 public:
 342     enum { HAAR = 0, LBP = 1, HOG = 2 };
 343     virtual ~FeatureEvaluator();
 344
 345     virtual bool read(const FileNode& node);
 346     virtual Ptr<FeatureEvaluator> clone() const;
 347     virtual int getFeatureType() const;
 348
 349     virtual bool setImage(const Mat& img, Size origWinSize);
 350     virtual bool setWindow(Point p);
 351
 352     virtual double calcOrd(int featureIdx) const;
 353     virtual int calcCat(int featureIdx) const;
 354
 355     static Ptr<FeatureEvaluator> create(int type);
 356 };
 357
 358 template<> CV_EXPORTS void Ptr<CvHaarClassifierCascade>::delete_obj();
 359
 360 enum
 361 {
 362     CASCADE_DO_CANNY_PRUNING=1,
 363     CASCADE_SCALE_IMAGE=2,
 364     CASCADE_FIND_BIGGEST_OBJECT=4,
 365     CASCADE_DO_ROUGH_SEARCH=8
 366 };
 367
 368 class CV_EXPORTS_W CascadeClassifier
 369 {
 370 public:
 371     CV_WRAP CascadeClassifier();
 372     CV_WRAP CascadeClassifier( const string& filename );
 373     virtual ~CascadeClassifier();
 374
 375     CV_WRAP virtual bool empty() const;
 376     CV_WRAP bool load( const string& filename );
 377     virtual bool read( const FileNode& node );
 378     CV_WRAP virtual void detectMultiScale( const Mat& image,
 379                                    CV_OUT vector<Rect>& objects,
 380                                    double scaleFactor=1.1,
 381                                    int minNeighbors=3, int flags=0,
 382                                    Size minSize=Size(),
 383                                    Size maxSize=Size() );
 384
 385     CV_WRAP virtual void detectMultiScale( const Mat& image,
 386                                    CV_OUT vector<Rect>& objects,
 387                                    vector<int>& rejectLevels,
 388                                    vector<double>& levelWeights,
 389                                    double scaleFactor=1.1,
 390                                    int minNeighbors=3, int flags=0,
 391                                    Size minSize=Size(),
 392                                    Size maxSize=Size(),
 393                                    bool outputRejectLevels=false );
 394
 395
 396     bool isOldFormatCascade() const;
 397     virtual Size getOriginalWindowSize() const;
 398     int getFeatureType() const;
 399     bool setImage( const Mat& );
 400
 401 protected:
 402     //virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
 403     //                                int stripSize, int yStep, double factor, vector<Rect>& candidates );
 404
 405     virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
 406                                     int stripSize, int yStep, double factor, vector<Rect>& candidates,
 407                                     vector<int>& rejectLevels, vector<double>& levelWeights, bool outputRejectLevels=false);
 408
 409 protected:
 410     enum { BOOST = 0 };
 411     enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,
 412            FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 };
 413
 414     friend class CascadeClassifierInvoker;
 415
 416     template<class FEval>
 417     friend int predictOrdered( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
 418
 419     template<class FEval>
 420     friend int predictCategorical( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
 421
 422     template<class FEval>
 423     friend int predictOrderedStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
 424
 425     template<class FEval>
 426     friend int predictCategoricalStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
 427
 428     bool setImage( Ptr<FeatureEvaluator>& feval, const Mat& image);
 429     virtual int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
 430
 431     class Data
 432     {
 433     public:
 434         struct CV_EXPORTS DTreeNode
 435         {
 436             int featureIdx;
 437             float threshold; // for ordered features only
 438             int left;
 439             int right;
 440         };
 441
 442         struct CV_EXPORTS DTree
 443         {
 444             int nodeCount;
 445         };
 446
 447         struct CV_EXPORTS Stage
 448         {
 449             int first;
 450             int ntrees;
 451             float threshold;
 452         };
 453
 454         bool read(const FileNode &node);
 455
 456         bool isStumpBased;
 457
 458         int stageType;
 459         int featureType;
 460         int ncategories;
 461         Size origWinSize;
 462
 463         vector<Stage> stages;
 464         vector<DTree> classifiers;
 465         vector<DTreeNode> nodes;
 466         vector<float> leaves;
 467         vector<int> subsets;
 468     };
 469
 470     Data data;
 471     Ptr<FeatureEvaluator> featureEvaluator;
 472     Ptr<CvHaarClassifierCascade> oldCascade;
 473
 474 public:
 475     class CV_EXPORTS MaskGenerator
 476     {
 477     public:
 478         virtual ~MaskGenerator() {}
 479         virtual cv::Mat generateMask(const cv::Mat& src)=0;
 480         virtual void initializeMask(const cv::Mat& /*src*/) {};
 481     };
 482     void setMaskGenerator(Ptr<MaskGenerator> maskGenerator);
 483     Ptr<MaskGenerator> getMaskGenerator();
 484
 485     void setFaceDetectionMaskGenerator();
 486
 487 protected:
 488     Ptr<MaskGenerator> maskGenerator;
 489 };
 490
 491
 492 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 493
 494 // struct for detection region of interest (ROI)
 495 struct DetectionROI
 496 {
 497    // scale(size) of the bounding box
 498    double scale;
 499    // set of requrested locations to be evaluated
 500    vector<cv::Point> locations;
 501    // vector that will contain confidence values for each location
 502    vector<double> confidences;
 503 };
 504
 505 struct CV_EXPORTS_W HOGDescriptor
 506 {
 507 public:
 508     enum { L2Hys=0 };
 509     enum { DEFAULT_NLEVELS=64 };
 510
 511     CV_WRAP HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
 512         cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
 513         histogramNormType(HOGDescriptor::L2Hys), L2HysThreshold(0.2), gammaCorrection(true),
 514         nlevels(HOGDescriptor::DEFAULT_NLEVELS)
 515     {}
 516
 517     CV_WRAP HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
 518                   Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
 519                   int _histogramNormType=HOGDescriptor::L2Hys,
 520                   double _L2HysThreshold=0.2, bool _gammaCorrection=false,
 521                   int _nlevels=HOGDescriptor::DEFAULT_NLEVELS)
 522     : winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
 523     nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
 524     histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
 525     gammaCorrection(_gammaCorrection), nlevels(_nlevels)
 526     {}
 527
 528     CV_WRAP HOGDescriptor(const String& filename)
 529     {
 530         load(filename);
 531     }
 532
 533     HOGDescriptor(const HOGDescriptor& d)
 534     {
 535         d.copyTo(*this);
 536     }
 537
 538     virtual ~HOGDescriptor() {}
 539
 540     CV_WRAP size_t getDescriptorSize() const;
 541     CV_WRAP bool checkDetectorSize() const;
 542     CV_WRAP double getWinSigma() const;
 543
 544     CV_WRAP virtual void setSVMDetector(InputArray _svmdetector);
 545
 546     virtual bool read(FileNode& fn);
 547     virtual void write(FileStorage& fs, const String& objname) const;
 548
 549     CV_WRAP virtual bool load(const String& filename, const String& objname=String());
 550     CV_WRAP virtual void save(const String& filename, const String& objname=String()) const;
 551     virtual void copyTo(HOGDescriptor& c) const;
 552
 553     CV_WRAP virtual void compute(const Mat& img,
 554                          CV_OUT vector<float>& descriptors,
 555                          Size winStride=Size(), Size padding=Size(),
 556                          const vector<Point>& locations=vector<Point>()) const;
 557     //with found weights output
 558     CV_WRAP virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
 559                         CV_OUT vector<double>& weights,
 560                         double hitThreshold=0, Size winStride=Size(),
 561                         Size padding=Size(),
 562                         const vector<Point>& searchLocations=vector<Point>()) const;
 563     //without found weights output
 564     virtual void detect(const Mat& img, CV_OUT vector<Point>& foundLocations,
 565                         double hitThreshold=0, Size winStride=Size(),
 566                         Size padding=Size(),
 567                         const vector<Point>& searchLocations=vector<Point>()) const;
 568     //with result weights output
 569     CV_WRAP virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
 570                                   CV_OUT vector<double>& foundWeights, double hitThreshold=0,
 571                                   Size winStride=Size(), Size padding=Size(), double scale=1.05,
 572                                   double finalThreshold=2.0,bool useMeanshiftGrouping = false) const;
 573     //without found weights output
 574     virtual void detectMultiScale(const Mat& img, CV_OUT vector<Rect>& foundLocations,
 575                                   double hitThreshold=0, Size winStride=Size(),
 576                                   Size padding=Size(), double scale=1.05,
 577                                   double finalThreshold=2.0, bool useMeanshiftGrouping = false) const;
 578
 579     CV_WRAP virtual void computeGradient(const Mat& img, CV_OUT Mat& grad, CV_OUT Mat& angleOfs,
 580                                  Size paddingTL=Size(), Size paddingBR=Size()) const;
 581
 582     CV_WRAP static vector<float> getDefaultPeopleDetector();
 583     CV_WRAP static vector<float> getDaimlerPeopleDetector();
 584
 585     CV_PROP Size winSize;
 586     CV_PROP Size blockSize;
 587     CV_PROP Size blockStride;
 588     CV_PROP Size cellSize;
 589     CV_PROP int nbins;
 590     CV_PROP int derivAperture;
 591     CV_PROP double winSigma;
 592     CV_PROP int histogramNormType;
 593     CV_PROP double L2HysThreshold;
 594     CV_PROP bool gammaCorrection;
 595     CV_PROP vector<float> svmDetector;
 596     CV_PROP int nlevels;
 597
 598
 599    // evaluate specified ROI and return confidence value for each location
 600    void detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
 601                                    CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
 602                                    double hitThreshold = 0, cv::Size winStride = Size(),
 603                                    cv::Size padding = Size()) const;
 604
 605    // evaluate specified ROI and return confidence value for each location in multiple scales
 606    void detectMultiScaleROI(const cv::Mat& img,
 607                                                        CV_OUT std::vector<cv::Rect>& foundLocations,
 608                                                        std::vector<DetectionROI>& locations,
 609                                                        double hitThreshold = 0,
 610                                                        int groupThreshold = 0) const;
 611
 612    // read/parse Dalal's alt model file
 613    void readALTModel(std::string modelfile);
 614 };
 615
 616
 617 CV_EXPORTS_W void findDataMatrix(InputArray image,
 618                                  CV_OUT vector<string>& codes,
 619                                  OutputArray corners=noArray(),
 620                                  OutputArrayOfArrays dmtx=noArray());
 621 CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
 622                                       const vector<string>& codes,
 623                                       InputArray corners);
 624 }
 625
 626 /****************************************************************************************\
 627 *                                Datamatrix                                              *
 628 \****************************************************************************************/
 629
 630 struct CV_EXPORTS CvDataMatrixCode {
 631   char msg[4];
 632   CvMat *original;
 633   CvMat *corners;
 634 };
 635
 636 CV_EXPORTS std::deque<CvDataMatrixCode> cvFindDataMatrix(CvMat *im);
 637
 638 /****************************************************************************************\
 639 *                                 LINE-MOD                                               *
 640 \****************************************************************************************/
 641
 642 namespace cv {
 643 namespace linemod {
 644
 645 using cv::FileNode;
 646 using cv::FileStorage;
 647 using cv::Mat;
 648 using cv::noArray;
 649 using cv::OutputArrayOfArrays;
 650 using cv::Point;
 651 using cv::Ptr;
 652 using cv::Rect;
 653 using cv::Size;
 654
 655 /// @todo Convert doxy comments to rst
 656
 657 /**
 658  * \brief Discriminant feature described by its location and label.
 659  */
 660 struct CV_EXPORTS Feature
 661 {
 662   int x; ///< x offset
 663   int y; ///< y offset
 664   int label; ///< Quantization
 665
 666   Feature() : x(0), y(0), label(0) {}
 667   Feature(int x, int y, int label);
 668
 669   void read(const FileNode& fn);
 670   void write(FileStorage& fs) const;
 671 };
 672
 673 inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
 674
 675 struct CV_EXPORTS Template
 676 {
 677   int width;
 678   int height;
 679   int pyramid_level;
 680   std::vector<Feature> features;
 681
 682   void read(const FileNode& fn);
 683   void write(FileStorage& fs) const;
 684 };
 685
 686 /**
 687  * \brief Represents a modality operating over an image pyramid.
 688  */
 689 class QuantizedPyramid
 690 {
 691 public:
 692   // Virtual destructor
 693   virtual ~QuantizedPyramid() {}
 694
 695   /**
 696    * \brief Compute quantized image at current pyramid level for online detection.
 697    *
 698    * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
 699    *                 representing its classification.
 700    */
 701   virtual void quantize(Mat& dst) const =0;
 702
 703   /**
 704    * \brief Extract most discriminant features at current pyramid level to form a new template.
 705    *
 706    * \param[out] templ The new template.
 707    */
 708   virtual bool extractTemplate(Template& templ) const =0;
 709
 710   /**
 711    * \brief Go to the next pyramid level.
 712    *
 713    * \todo Allow pyramid scale factor other than 2
 714    */
 715   virtual void pyrDown() =0;
 716
 717 protected:
 718   /// Candidate feature with a score
 719   struct Candidate
 720   {
 721     Candidate(int x, int y, int label, float score);
 722
 723     /// Sort candidates with high score to the front
 724     bool operator<(const Candidate& rhs) const
 725     {
 726       return score > rhs.score;
 727     }
 728
 729     Feature f;
 730     float score;
 731   };
 732
 733   /**
 734    * \brief Choose candidate features so that they are not bunched together.
 735    *
 736    * \param[in]  candidates   Candidate features sorted by score.
 737    * \param[out] features     Destination vector of selected features.
 738    * \param[in]  num_features Number of candidates to select.
 739    * \param[in]  distance     Hint for desired distance between features.
 740    */
 741   static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
 742                                       std::vector<Feature>& features,
 743                                       size_t num_features, float distance);
 744 };
 745
 746 inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
 747
 748 /**
 749  * \brief Interface for modalities that plug into the LINE template matching representation.
 750  *
 751  * \todo Max response, to allow optimization of summing (255/MAX) features as uint8
 752  */
 753 class CV_EXPORTS Modality
 754 {
 755 public:
 756   // Virtual destructor
 757   virtual ~Modality() {}
 758
 759   /**
 760    * \brief Form a quantized image pyramid from a source image.
 761    *
 762    * \param[in] src  The source image. Type depends on the modality.
 763    * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
 764    *                 in quantized image and cannot be extracted as features.
 765    */
 766   Ptr<QuantizedPyramid> process(const Mat& src,
 767                     const Mat& mask = Mat()) const
 768   {
 769     return processImpl(src, mask);
 770   }
 771
 772   virtual std::string name() const =0;
 773
 774   virtual void read(const FileNode& fn) =0;
 775   virtual void write(FileStorage& fs) const =0;
 776
 777   /**
 778    * \brief Create modality by name.
 779    *
 780    * The following modality types are supported:
 781    * - "ColorGradient"
 782    * - "DepthNormal"
 783    */
 784   static Ptr<Modality> create(const std::string& modality_type);
 785
 786   /**
 787    * \brief Load a modality from file.
 788    */
 789   static Ptr<Modality> create(const FileNode& fn);
 790
 791 protected:
 792   // Indirection is because process() has a default parameter.
 793   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
 794                         const Mat& mask) const =0;
 795 };
 796
 797 /**
 798  * \brief Modality that computes quantized gradient orientations from a color image.
 799  */
 800 class CV_EXPORTS ColorGradient : public Modality
 801 {
 802 public:
 803   /**
 804    * \brief Default constructor. Uses reasonable default parameter values.
 805    */
 806   ColorGradient();
 807
 808   /**
 809    * \brief Constructor.
 810    *
 811    * \param weak_threshold   When quantizing, discard gradients with magnitude less than this.
 812    * \param num_features     How many features a template must contain.
 813    * \param strong_threshold Consider as candidate features only gradients whose norms are
 814    *                         larger than this.
 815    */
 816   ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
 817
 818   virtual std::string name() const;
 819
 820   virtual void read(const FileNode& fn);
 821   virtual void write(FileStorage& fs) const;
 822
 823   float weak_threshold;
 824   size_t num_features;
 825   float strong_threshold;
 826
 827 protected:
 828   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
 829                         const Mat& mask) const;
 830 };
 831
 832 /**
 833  * \brief Modality that computes quantized surface normals from a dense depth map.
 834  */
 835 class CV_EXPORTS DepthNormal : public Modality
 836 {
 837 public:
 838   /**
 839    * \brief Default constructor. Uses reasonable default parameter values.
 840    */
 841   DepthNormal();
 842
 843   /**
 844    * \brief Constructor.
 845    *
 846    * \param distance_threshold   Ignore pixels beyond this distance.
 847    * \param difference_threshold When computing normals, ignore contributions of pixels whose
 848    *                             depth difference with the central pixel is above this threshold.
 849    * \param num_features         How many features a template must contain.
 850    * \param extract_threshold    Consider as candidate feature only if there are no differing
 851    *                             orientations within a distance of extract_threshold.
 852    */
 853   DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
 854               int extract_threshold);
 855
 856   virtual std::string name() const;
 857
 858   virtual void read(const FileNode& fn);
 859   virtual void write(FileStorage& fs) const;
 860
 861   int distance_threshold;
 862   int difference_threshold;
 863   size_t num_features;
 864   int extract_threshold;
 865
 866 protected:
 867   virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
 868                         const Mat& mask) const;
 869 };
 870
 871 /**
 872  * \brief Debug function to colormap a quantized image for viewing.
 873  */
 874 void colormap(const Mat& quantized, Mat& dst);
 875
 876 /**
 877  * \brief Represents a successful template match.
 878  */
 879 struct CV_EXPORTS Match
 880 {
 881   Match()
 882   {
 883   }
 884
 885   Match(int x, int y, float similarity, const std::string& class_id, int template_id);
 886
 887   /// Sort matches with high similarity to the front
 888   bool operator<(const Match& rhs) const
 889   {
 890     // Secondarily sort on template_id for the sake of duplicate removal
 891     if (similarity != rhs.similarity)
 892       return similarity > rhs.similarity;
 893     else
 894       return template_id < rhs.template_id;
 895   }
 896
 897   bool operator==(const Match& rhs) const
 898   {
 899     return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
 900   }
 901
 902   int x;
 903   int y;
 904   float similarity;
 905   std::string class_id;
 906   int template_id;
 907 };
 908
 909 inline  Match::Match(int _x, int _y, float _similarity, const std::string& _class_id, int _template_id)
 910     : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
 911   {
 912   }
 913
 914 /**
 915  * \brief Object detector using the LINE template matching algorithm with any set of
 916  * modalities.
 917  */
 918 class CV_EXPORTS Detector
 919 {
 920 public:
 921   /**
 922    * \brief Empty constructor, initialize with read().
 923    */
 924   Detector();
 925
 926   /**
 927    * \brief Constructor.
 928    *
 929    * \param modalities       Modalities to use (color gradients, depth normals, ...).
 930    * \param T_pyramid        Value of the sampling step T at each pyramid level. The
 931    *                         number of pyramid levels is T_pyramid.size().
 932    */
 933   Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
 934
 935   /**
 936    * \brief Detect objects by template matching.
 937    *
 938    * Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
 939    *
 940    * \param      sources   Source images, one for each modality.
 941    * \param      threshold Similarity threshold, a percentage between 0 and 100.
 942    * \param[out] matches   Template matches, sorted by similarity score.
 943    * \param      class_ids If non-empty, only search for the desired object classes.
 944    * \param[out] quantized_images Optionally return vector<Mat> of quantized images.
 945    * \param      masks     The masks for consideration during matching. The masks should be CV_8UC1
 946    *                       where 255 represents a valid pixel.  If non-empty, the vector must be
 947    *                       the same size as sources.  Each element must be
 948    *                       empty or the same size as its corresponding source.
 949    */
 950   void match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches,
 951              const std::vector<std::string>& class_ids = std::vector<std::string>(),
 952              OutputArrayOfArrays quantized_images = noArray(),
 953              const std::vector<Mat>& masks = std::vector<Mat>()) const;
 954
 955   /**
 956    * \brief Add new object template.
 957    *
 958    * \param      sources      Source images, one for each modality.
 959    * \param      class_id     Object class ID.
 960    * \param      object_mask  Mask separating object from background.
 961    * \param[out] bounding_box Optionally return bounding box of the extracted features.
 962    *
 963    * \return Template ID, or -1 if failed to extract a valid template.
 964    */
 965   int addTemplate(const std::vector<Mat>& sources, const std::string& class_id,
 966           const Mat& object_mask, Rect* bounding_box = NULL);
 967
 968   /**
 969    * \brief Add a new object template computed by external means.
 970    */
 971   int addSyntheticTemplate(const std::vector<Template>& templates, const std::string& class_id);
 972
 973   /**
 974    * \brief Get the modalities used by this detector.
 975    *
 976    * You are not permitted to add/remove modalities, but you may dynamic_cast them to
 977    * tweak parameters.
 978    */
 979   const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
 980
 981   /**
 982    * \brief Get sampling step T at pyramid_level.
 983    */
 984   int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
 985
 986   /**
 987    * \brief Get number of pyramid levels used by this detector.
 988    */
 989   int pyramidLevels() const { return pyramid_levels; }
 990
 991   /**
 992    * \brief Get the template pyramid identified by template_id.
 993    *
 994    * For example, with 2 modalities (Gradient, Normal) and two pyramid levels
 995    * (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
 996    */
 997   const std::vector<Template>& getTemplates(const std::string& class_id, int template_id) const;
 998
 999   int numTemplates() const;
1000   int numTemplates(const std::string& class_id) const;
1001   int numClasses() const { return static_cast<int>(class_templates.size()); }
1002
1003   std::vector<std::string> classIds() const;
1004
1005   void read(const FileNode& fn);
1006   void write(FileStorage& fs) const;
1007
1008   std::string readClass(const FileNode& fn, const std::string &class_id_override = "");
1009   void writeClass(const std::string& class_id, FileStorage& fs) const;
1010
1011   void readClasses(const std::vector<std::string>& class_ids,
1012                    const std::string& format = "templates_%s.yml.gz");
1013   void writeClasses(const std::string& format = "templates_%s.yml.gz") const;
1014
1015 protected:
1016   std::vector< Ptr<Modality> > modalities;
1017   int pyramid_levels;
1018   std::vector<int> T_at_level;
1019
1020   typedef std::vector<Template> TemplatePyramid;
1021   typedef std::map<std::string, std::vector<TemplatePyramid> > TemplatesMap;
1022   TemplatesMap class_templates;
1023
1024   typedef std::vector<Mat> LinearMemories;
1025   // Indexed as [pyramid level][modality][quantized label]
1026   typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
1027
1028   void matchClass(const LinearMemoryPyramid& lm_pyramid,
1029                   const std::vector<Size>& sizes,
1030                   float threshold, std::vector<Match>& matches,
1031                   const std::string& class_id,
1032                   const std::vector<TemplatePyramid>& template_pyramids) const;
1033 };
1034
1035 /**
1036  * \brief Factory function for detector using LINE algorithm with color gradients.
1037  *
1038  * Default parameter settings suitable for VGA images.
1039  */
1040 CV_EXPORTS Ptr<Detector> getDefaultLINE();
1041
1042 /**
1043  * \brief Factory function for detector using LINE-MOD algorithm with color gradients
1044  * and depth normals.
1045  *
1046  * Default parameter settings suitable for VGA images.
1047  */
1048 CV_EXPORTS Ptr<Detector> getDefaultLINEMOD();
1049
1050 } // namespace linemod
1051 } // namespace cv
1052
1053 #endif
1054
1055 #endif