1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #ifndef __OPENCV_CONTRIB_HPP__
44 #define __OPENCV_CONTRIB_HPP__
46 #include "opencv2/core.hpp"
47 #include "opencv2/imgproc.hpp"
48 #include "opencv2/features2d.hpp"
49 #include "opencv2/objdetect.hpp"
53 class CV_EXPORTS Octree
58 Node() { memset(this, 0, sizeof(Node)); }
60 float x_min, x_max, y_min, y_max, z_min, z_max;
67 Octree( const std::vector<Point3f>& points, int maxLevels = 10, int minPoints = 20 );
70 virtual void buildTree( const std::vector<Point3f>& points, int maxLevels = 10, int minPoints = 20 );
71 virtual void getPointsWithinSphere( const Point3f& center, float radius,
72 std::vector<Point3f>& points ) const;
73 const std::vector<Node>& getNodes() const { return nodes; }
76 std::vector<Point3f> points;
77 std::vector<Node> nodes;
79 virtual void buildNext(size_t node_ind);
83 class CV_EXPORTS Mesh3D
86 struct EmptyMeshException {};
89 Mesh3D(const std::vector<Point3f>& vtx);
94 float estimateResolution(float tryRatio = 0.1f);
95 void computeNormals(float normalRadius, int minNeighbors = 20);
96 void computeNormals(const std::vector<int>& subset, float normalRadius, int minNeighbors = 20);
98 void writeAsVrml(const String& file, const std::vector<Scalar>& colors = std::vector<Scalar>()) const;
100 std::vector<Point3f> vtx;
101 std::vector<Point3f> normals;
105 const static Point3f allzero;
108 class CV_EXPORTS SpinImageModel
112 /* model parameters, leave unset for default or auto estimate */
122 float T_GeometriccConsistency;
123 float T_GroupingCorespondances;
125 /* public interface */
127 explicit SpinImageModel(const Mesh3D& mesh);
130 void selectRandomSubset(float ratio);
131 void setSubset(const std::vector<int>& subset);
134 void match(const SpinImageModel& scene, std::vector< std::vector<Vec2i> >& result);
136 Mat packRandomScaledSpins(bool separateScale = false, size_t xCount = 10, size_t yCount = 10) const;
138 size_t getSpinCount() const { return spinImages.rows; }
139 Mat getSpinImage(size_t index) const { return spinImages.row((int)index); }
140 const Point3f& getSpinVertex(size_t index) const { return mesh.vtx[subset[index]]; }
141 const Point3f& getSpinNormal(size_t index) const { return mesh.normals[subset[index]]; }
143 const Mesh3D& getMesh() const { return mesh; }
144 Mesh3D& getMesh() { return mesh; }
146 /* static utility functions */
147 static bool spinCorrelation(const Mat& spin1, const Mat& spin2, float lambda, float& result);
149 static Point2f calcSpinMapCoo(const Point3f& point, const Point3f& vertex, const Point3f& normal);
151 static float geometricConsistency(const Point3f& pointScene1, const Point3f& normalScene1,
152 const Point3f& pointModel1, const Point3f& normalModel1,
153 const Point3f& pointScene2, const Point3f& normalScene2,
154 const Point3f& pointModel2, const Point3f& normalModel2);
156 static float groupingCreteria(const Point3f& pointScene1, const Point3f& normalScene1,
157 const Point3f& pointModel1, const Point3f& normalModel1,
158 const Point3f& pointScene2, const Point3f& normalScene2,
159 const Point3f& pointModel2, const Point3f& normalModel2,
162 void defaultParams();
164 void matchSpinToModel(const Mat& spin, std::vector<int>& indeces,
165 std::vector<float>& corrCoeffs, bool useExtremeOutliers = true) const;
167 void repackSpinImages(const std::vector<uchar>& mask, Mat& spinImages, bool reAlloc = true) const;
169 std::vector<int> subset;
174 class CV_EXPORTS TickMeter
181 int64 getTimeTicks() const;
182 double getTimeMicro() const;
183 double getTimeMilli() const;
184 double getTimeSec() const;
185 int64 getCounter() const;
194 //CV_EXPORTS std::ostream& operator<<(std::ostream& out, const TickMeter& tm);
196 class CV_EXPORTS SelfSimDescriptor
200 SelfSimDescriptor(int _ssize, int _lsize,
201 int _startDistanceBucket=DEFAULT_START_DISTANCE_BUCKET,
202 int _numberOfDistanceBuckets=DEFAULT_NUM_DISTANCE_BUCKETS,
203 int _nangles=DEFAULT_NUM_ANGLES);
204 SelfSimDescriptor(const SelfSimDescriptor& ss);
205 virtual ~SelfSimDescriptor();
206 SelfSimDescriptor& operator = (const SelfSimDescriptor& ss);
208 size_t getDescriptorSize() const;
209 Size getGridSize( Size imgsize, Size winStride ) const;
211 virtual void compute(const Mat& img, std::vector<float>& descriptors, Size winStride=Size(),
212 const std::vector<Point>& locations=std::vector<Point>()) const;
213 virtual void computeLogPolarMapping(Mat& mappingMask) const;
214 virtual void SSD(const Mat& img, Point pt, Mat& ssd) const;
218 int startDistanceBucket;
219 int numberOfDistanceBuckets;
222 enum { DEFAULT_SMALL_SIZE = 5, DEFAULT_LARGE_SIZE = 41,
223 DEFAULT_NUM_ANGLES = 20, DEFAULT_START_DISTANCE_BUCKET = 3,
224 DEFAULT_NUM_DISTANCE_BUCKETS = 7 };
228 CV_EXPORTS_W int chamerMatching( Mat& img, Mat& templ,
229 CV_OUT std::vector<std::vector<Point> >& results, CV_OUT std::vector<float>& cost,
230 double templScale=1, int maxMatches = 20,
231 double minMatchDistance = 1.0, int padX = 3,
232 int padY = 3, int scales = 5, double minScale = 0.6, double maxScale = 1.6,
233 double orientationWeight = 0.5, double truncate = 20);
236 class CV_EXPORTS_W StereoVar
240 enum {USE_INITIAL_DISPARITY = 1, USE_EQUALIZE_HIST = 2, USE_SMART_ID = 4, USE_AUTO_PARAMS = 8, USE_MEDIAN_FILTERING = 16};
241 enum {CYCLE_O, CYCLE_V};
242 enum {PENALIZATION_TICHONOV, PENALIZATION_CHARBONNIER, PENALIZATION_PERONA_MALIK};
244 //! the default constructor
247 //! the full constructor taking all the necessary algorithm parameters
248 CV_WRAP StereoVar(int levels, double pyrScale, int nIt, int minDisp, int maxDisp, int poly_n, double poly_sigma, float fi, float lambda, int penalization, int cycle, int flags);
251 virtual ~StereoVar();
253 //! the stereo correspondence operator that computes disparity map for the specified rectified stereo pair
254 CV_WRAP_AS(compute) virtual void operator()(const Mat& left, const Mat& right, CV_OUT Mat& disp);
256 CV_PROP_RW int levels;
257 CV_PROP_RW double pyrScale;
259 CV_PROP_RW int minDisp;
260 CV_PROP_RW int maxDisp;
261 CV_PROP_RW int poly_n;
262 CV_PROP_RW double poly_sigma;
264 CV_PROP_RW float lambda;
265 CV_PROP_RW int penalization;
266 CV_PROP_RW int cycle;
267 CV_PROP_RW int flags;
271 void FMG(Mat &I1, Mat &I2, Mat &I2x, Mat &u, int level);
272 void VCycle_MyFAS(Mat &I1_h, Mat &I2_h, Mat &I2x_h, Mat &u_h, int level);
273 void VariationalSolver(Mat &I1_h, Mat &I2_h, Mat &I2x_h, Mat &u_h, int level);
276 CV_EXPORTS void polyfit(const Mat& srcx, const Mat& srcy, Mat& dst, int order);
278 class CV_EXPORTS Directory
281 static std::vector<String> GetListFiles ( const String& path, const String & exten = "*", bool addPath = true );
282 static std::vector<String> GetListFilesR ( const String& path, const String & exten = "*", bool addPath = true );
283 static std::vector<String> GetListFolders( const String& path, const String & exten = "*", bool addPath = true );
287 * Generation of a set of different colors by the following way:
288 * 1) generate more then need colors (in "factor" times) in RGB,
289 * 2) convert them to Lab,
290 * 3) choose the needed count of colors from the set that are more different from
292 * 4) convert the colors back to RGB
294 CV_EXPORTS void generateColors( std::vector<Scalar>& colors, size_t count, size_t factor=100 );
298 * Estimate the rigid body motion from frame0 to frame1. The method is based on the paper
299 * "Real-Time Visual Odometry from Dense RGB-D Images", F. Steinbucker, J. Strum, D. Cremers, ICCV, 2011.
303 RIGID_BODY_MOTION = 4
305 CV_EXPORTS bool RGBDOdometry( Mat& Rt, const Mat& initRt,
306 const Mat& image0, const Mat& depth0, const Mat& mask0,
307 const Mat& image1, const Mat& depth1, const Mat& mask1,
308 const Mat& cameraMatrix, float minDepth=0.f, float maxDepth=4.f, float maxDepthDiff=0.07f,
309 const std::vector<int>& iterCounts=std::vector<int>(),
310 const std::vector<float>& minGradientMagnitudes=std::vector<float>(),
311 int transformType=RIGID_BODY_MOTION );
314 *Bilinear interpolation technique.
316 *The value of a desired cortical pixel is obtained through a bilinear interpolation of the values
317 *of the four nearest neighbouring Cartesian pixels to the center of the RF.
318 *The same principle is applied to the inverse transformation.
320 *More details can be found in http://dx.doi.org/10.1007/978-3-642-23968-7_5
322 class CV_EXPORTS LogPolar_Interp
330 *\param w the width of the input image
331 *\param h the height of the input image
332 *\param center the transformation center: where the output precision is maximal
333 *\param R the number of rings of the cortical image (default value 70 pixel)
334 *\param ro0 the radius of the blind spot (default value 3 pixel)
335 *\param full \a 1 (default value) means that the retinal image (the inverse transform) is computed within the circumscribing circle.
336 * \a 0 means that the retinal image is computed within the inscribed circle.
337 *\param S the number of sectors of the cortical image (default value 70 pixel).
338 * Its value is usually internally computed to obtain a pixel aspect ratio equals to 1.
339 *\param sp \a 1 (default value) means that the parameter \a S is internally computed.
340 * \a 0 means that the parameter \a S is provided by the user.
342 LogPolar_Interp(int w, int h, Point2i center, int R=70, double ro0=3.0,
343 int interp=INTER_LINEAR, int full=1, int S=117, int sp=1);
345 *Transformation from Cartesian image to cortical (log-polar) image.
346 *\param source the Cartesian image
347 *\return the transformed image (cortical image)
349 const Mat to_cortical(const Mat &source);
351 *Transformation from cortical image to retinal (inverse log-polar) image.
352 *\param source the cortical image
353 *\return the transformed image (retinal image)
355 const Mat to_cartesian(const Mat &source);
367 int top, bottom,left,right;
368 double ro0, romax, a, q;
374 void create_map(int M, int N, int R, int S, double ro0);
378 *Overlapping circular receptive fields technique
380 *The Cartesian plane is divided in two regions: the fovea and the periphery.
381 *The fovea (oversampling) is handled by using the bilinear interpolation technique described above, whereas in
382 *the periphery we use the overlapping Gaussian circular RFs.
384 *More details can be found in http://dx.doi.org/10.1007/978-3-642-23968-7_5
386 class CV_EXPORTS LogPolar_Overlapping
389 LogPolar_Overlapping() {}
393 *\param w the width of the input image
394 *\param h the height of the input image
395 *\param center the transformation center: where the output precision is maximal
396 *\param R the number of rings of the cortical image (default value 70 pixel)
397 *\param ro0 the radius of the blind spot (default value 3 pixel)
398 *\param full \a 1 (default value) means that the retinal image (the inverse transform) is computed within the circumscribing circle.
399 * \a 0 means that the retinal image is computed within the inscribed circle.
400 *\param S the number of sectors of the cortical image (default value 70 pixel).
401 * Its value is usually internally computed to obtain a pixel aspect ratio equals to 1.
402 *\param sp \a 1 (default value) means that the parameter \a S is internally computed.
403 * \a 0 means that the parameter \a S is provided by the user.
405 LogPolar_Overlapping(int w, int h, Point2i center, int R=70,
406 double ro0=3.0, int full=1, int S=117, int sp=1);
408 *Transformation from Cartesian image to cortical (log-polar) image.
409 *\param source the Cartesian image
410 *\return the transformed image (cortical image)
412 const Mat to_cortical(const Mat &source);
414 *Transformation from cortical image to retinal (inverse log-polar) image.
415 *\param source the cortical image
416 *\return the transformed image (retinal image)
418 const Mat to_cartesian(const Mat &source);
422 ~LogPolar_Overlapping();
428 std::vector<int> Rsr;
429 std::vector<int> Csr;
430 std::vector<double> Wsr;
432 int S, R, M, N, ind1;
433 int top, bottom,left,right;
434 double ro0, romax, a, q;
439 std::vector<double> weights;
445 std::vector<kernel> w_ker_2D;
447 void create_map(int M, int N, int R, int S, double ro0);
451 * Adjacent receptive fields technique
453 *All the Cartesian pixels, whose coordinates in the cortical domain share the same integer part, are assigned to the same RF.
454 *The precision of the boundaries of the RF can be improved by breaking each pixel into subpixels and assigning each of them to the correct RF.
455 *This technique is implemented from: Traver, V., Pla, F.: Log-polar mapping template design: From task-level requirements
456 *to geometry parameters. Image Vision Comput. 26(10) (2008) 1354-1370
458 *More details can be found in http://dx.doi.org/10.1007/978-3-642-23968-7_5
460 class CV_EXPORTS LogPolar_Adjacent
463 LogPolar_Adjacent() {}
467 *\param w the width of the input image
468 *\param h the height of the input image
469 *\param center the transformation center: where the output precision is maximal
470 *\param R the number of rings of the cortical image (default value 70 pixel)
471 *\param ro0 the radius of the blind spot (default value 3 pixel)
472 *\param smin the size of the subpixel (default value 0.25 pixel)
473 *\param full \a 1 (default value) means that the retinal image (the inverse transform) is computed within the circumscribing circle.
474 * \a 0 means that the retinal image is computed within the inscribed circle.
475 *\param S the number of sectors of the cortical image (default value 70 pixel).
476 * Its value is usually internally computed to obtain a pixel aspect ratio equals to 1.
477 *\param sp \a 1 (default value) means that the parameter \a S is internally computed.
478 * \a 0 means that the parameter \a S is provided by the user.
480 LogPolar_Adjacent(int w, int h, Point2i center, int R=70, double ro0=3.0, double smin=0.25, int full=1, int S=117, int sp=1);
482 *Transformation from Cartesian image to cortical (log-polar) image.
483 *\param source the Cartesian image
484 *\return the transformed image (cortical image)
486 const Mat to_cortical(const Mat &source);
488 *Transformation from cortical image to retinal (inverse log-polar) image.
489 *\param source the cortical image
490 *\return the transformed image (retinal image)
492 const Mat to_cartesian(const Mat &source);
496 ~LogPolar_Adjacent();
501 pixel() { u = v = 0; a = 0.; }
507 int top, bottom,left,right;
508 double ro0, romax, a, q;
509 std::vector<std::vector<pixel> > L;
510 std::vector<double> A;
512 void subdivide_recursively(double x, double y, int i, int j, double length, double smin);
513 bool get_uv(double x, double y, int&u, int&v);
514 void create_map(int M, int N, int R, int S, double ro0, double smin);
517 CV_EXPORTS Mat subspaceProject(InputArray W, InputArray mean, InputArray src);
518 CV_EXPORTS Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src);
523 // Initializes a LDA with num_components (default 0) and specifies how
524 // samples are aligned (default dataAsRow=true).
525 LDA(int num_components = 0) :
526 _num_components(num_components) { }
528 // Initializes and performs a Discriminant Analysis with Fisher's
529 // Optimization Criterion on given data in src and corresponding labels
530 // in labels. If 0 (or less) number of components are given, they are
531 // automatically determined for given data in computation.
532 LDA(InputArrayOfArrays src, InputArray labels,
533 int num_components = 0) :
534 _num_components(num_components)
536 this->compute(src, labels); //! compute eigenvectors and eigenvalues
539 // Serializes this object to a given filename.
540 void save(const String& filename) const;
542 // Deserializes this object from a given filename.
543 void load(const String& filename);
545 // Serializes this object to a given cv::FileStorage.
546 void save(FileStorage& fs) const;
548 // Deserializes this object from a given cv::FileStorage.
549 void load(const FileStorage& node);
554 //! Compute the discriminants for data in src and labels.
555 void compute(InputArrayOfArrays src, InputArray labels);
557 // Projects samples into the LDA subspace.
558 Mat project(InputArray src);
560 // Reconstructs projections from the LDA subspace.
561 Mat reconstruct(InputArray src);
563 // Returns the eigenvectors of this LDA.
564 Mat eigenvectors() const { return _eigenvectors; }
566 // Returns the eigenvalues of this LDA.
567 Mat eigenvalues() const { return _eigenvalues; }
575 void lda(InputArrayOfArrays src, InputArray labels);
578 class CV_EXPORTS_W FaceRecognizer : public Algorithm
581 //! virtual destructor
582 virtual ~FaceRecognizer() {}
584 // Trains a FaceRecognizer.
585 CV_WRAP virtual void train(InputArrayOfArrays src, InputArray labels) = 0;
587 // Updates a FaceRecognizer.
588 CV_WRAP virtual void update(InputArrayOfArrays src, InputArray labels);
590 // Gets a prediction from a FaceRecognizer.
591 virtual int predict(InputArray src) const = 0;
593 // Predicts the label and confidence for a given sample.
594 CV_WRAP virtual void predict(InputArray src, CV_OUT int &label, CV_OUT double &confidence) const = 0;
596 // Serializes this object to a given filename.
597 CV_WRAP virtual void save(const String& filename) const;
599 // Deserializes this object from a given filename.
600 CV_WRAP virtual void load(const String& filename);
602 // Serializes this object to a given cv::FileStorage.
603 virtual void save(FileStorage& fs) const = 0;
605 // Deserializes this object from a given cv::FileStorage.
606 virtual void load(const FileStorage& fs) = 0;
610 CV_EXPORTS_W Ptr<FaceRecognizer> createEigenFaceRecognizer(int num_components = 0, double threshold = DBL_MAX);
611 CV_EXPORTS_W Ptr<FaceRecognizer> createFisherFaceRecognizer(int num_components = 0, double threshold = DBL_MAX);
612 CV_EXPORTS_W Ptr<FaceRecognizer> createLBPHFaceRecognizer(int radius=1, int neighbors=8,
613 int grid_x=8, int grid_y=8, double threshold = DBL_MAX);
621 COLORMAP_RAINBOW = 4,
631 CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap);
633 CV_EXPORTS bool initModule_contrib();
636 #include "opencv2/contrib/openfabmap.hpp"