1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #ifndef __OPENCV_ML_HPP__
44 #define __OPENCV_ML_HPP__
47 # include "opencv2/core.hpp"
82 class CV_EXPORTS_W_MAP ParamGrid
86 ParamGrid(double _minVal, double _maxVal, double _logStep);
88 CV_PROP_RW double minVal;
89 CV_PROP_RW double maxVal;
90 CV_PROP_RW double logStep;
92 #define CV_TYPE_NAME_ML_LR "opencv-ml-lr"
96 class CV_EXPORTS TrainData
99 static inline float missingValue() { return FLT_MAX; }
100 virtual ~TrainData();
102 virtual int getLayout() const = 0;
103 virtual int getNTrainSamples() const = 0;
104 virtual int getNTestSamples() const = 0;
105 virtual int getNSamples() const = 0;
106 virtual int getNVars() const = 0;
107 virtual int getNAllVars() const = 0;
109 virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
110 virtual Mat getSamples() const = 0;
111 virtual Mat getMissing() const = 0;
112 virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
113 bool compressSamples=true,
114 bool compressVars=true) const = 0;
115 virtual Mat getTrainResponses() const = 0;
116 virtual Mat getTrainNormCatResponses() const = 0;
117 virtual Mat getTestResponses() const = 0;
118 virtual Mat getTestNormCatResponses() const = 0;
119 virtual Mat getResponses() const = 0;
120 virtual Mat getNormCatResponses() const = 0;
121 virtual Mat getSampleWeights() const = 0;
122 virtual Mat getTrainSampleWeights() const = 0;
123 virtual Mat getTestSampleWeights() const = 0;
124 virtual Mat getVarIdx() const = 0;
125 virtual Mat getVarType() const = 0;
126 virtual int getResponseType() const = 0;
127 virtual Mat getTrainSampleIdx() const = 0;
128 virtual Mat getTestSampleIdx() const = 0;
129 virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
130 virtual void getNormCatValues(int vi, InputArray sidx, int* values) const = 0;
131 virtual Mat getDefaultSubstValues() const = 0;
133 virtual int getCatCount(int vi) const = 0;
134 virtual Mat getClassLabels() const = 0;
136 virtual Mat getCatOfs() const = 0;
137 virtual Mat getCatMap() const = 0;
139 virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
140 virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
141 virtual void shuffleTrainTest() = 0;
143 static Mat getSubVector(const Mat& vec, const Mat& idx);
144 static Ptr<TrainData> loadFromCSV(const String& filename,
146 int responseStartIdx=-1,
147 int responseEndIdx=-1,
148 const String& varTypeSpec=String(),
151 static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
152 InputArray varIdx=noArray(), InputArray sampleIdx=noArray(),
153 InputArray sampleWeights=noArray(), InputArray varType=noArray());
157 class CV_EXPORTS_W StatModel : public Algorithm
160 enum { UPDATE_MODEL = 1, RAW_OUTPUT=1, COMPRESSED_INPUT=2, PREPROCESSED_INPUT=4 };
161 virtual void clear();
163 virtual int getVarCount() const = 0;
165 virtual bool isTrained() const = 0;
166 virtual bool isClassifier() const = 0;
168 virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
169 virtual bool train( InputArray samples, int layout, InputArray responses );
170 virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
171 virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
173 template<typename _Tp> static Ptr<_Tp> load(const String& filename)
175 FileStorage fs(filename, FileStorage::READ);
176 Ptr<_Tp> model = _Tp::create();
177 model->read(fs.getFirstTopLevelNode());
178 return model->isTrained() ? model : Ptr<_Tp>();
181 template<typename _Tp> static Ptr<_Tp> train(const Ptr<TrainData>& data, const typename _Tp::Params& p, int flags=0)
183 Ptr<_Tp> model = _Tp::create(p);
184 return !model.empty() && model->train(data, flags) ? model : Ptr<_Tp>();
187 template<typename _Tp> static Ptr<_Tp> train(InputArray samples, int layout, InputArray responses,
188 const typename _Tp::Params& p, int flags=0)
190 Ptr<_Tp> model = _Tp::create(p);
191 return !model.empty() && model->train(TrainData::create(samples, layout, responses), flags) ? model : Ptr<_Tp>();
194 virtual void save(const String& filename) const;
195 virtual String getDefaultModelName() const = 0;
198 /****************************************************************************************\
199 * Normal Bayes Classifier *
200 \****************************************************************************************/
202 /* The structure, representing the grid range of statmodel parameters.
203 It is used for optimizing statmodel accuracy by varying model parameters,
204 the accuracy estimate being computed by cross-validation.
205 The grid is logarithmic, so <step> must be greater then 1. */
207 class CV_EXPORTS_W NormalBayesClassifier : public StatModel
210 class CV_EXPORTS_W Params
215 virtual float predictProb( InputArray inputs, OutputArray outputs,
216 OutputArray outputProbs, int flags=0 ) const = 0;
217 virtual void setParams(const Params& params) = 0;
218 virtual Params getParams() const = 0;
220 static Ptr<NormalBayesClassifier> create(const Params& params=Params());
223 /****************************************************************************************\
224 * K-Nearest Neighbour Classifier *
225 \****************************************************************************************/
227 // k Nearest Neighbors
228 class CV_EXPORTS_W KNearest : public StatModel
231 class CV_EXPORTS_W_MAP Params
234 Params(int defaultK=10, bool isclassifier=true);
236 CV_PROP_RW int defaultK;
237 CV_PROP_RW bool isclassifier;
239 virtual void setParams(const Params& p) = 0;
240 virtual Params getParams() const = 0;
241 virtual float findNearest( InputArray samples, int k,
243 OutputArray neighborResponses=noArray(),
244 OutputArray dist=noArray() ) const = 0;
245 static Ptr<KNearest> create(const Params& params=Params());
248 /****************************************************************************************\
249 * Support Vector Machines *
250 \****************************************************************************************/
253 class CV_EXPORTS_W SVM : public StatModel
256 class CV_EXPORTS_W_MAP Params
260 Params( int svm_type, int kernel_type,
261 double degree, double gamma, double coef0,
262 double Cvalue, double nu, double p,
263 const Mat& classWeights, TermCriteria termCrit );
265 CV_PROP_RW int svmType;
266 CV_PROP_RW int kernelType;
267 CV_PROP_RW double gamma, coef0, degree;
269 CV_PROP_RW double C; // for CV_SVM_C_SVC, CV_SVM_EPS_SVR and CV_SVM_NU_SVR
270 CV_PROP_RW double nu; // for CV_SVM_NU_SVC, CV_SVM_ONE_CLASS, and CV_SVM_NU_SVR
271 CV_PROP_RW double p; // for CV_SVM_EPS_SVR
272 CV_PROP_RW Mat classWeights; // for CV_SVM_C_SVC
273 CV_PROP_RW TermCriteria termCrit; // termination criteria
276 class CV_EXPORTS Kernel : public Algorithm
279 virtual int getType() const = 0;
280 virtual void calc( int vcount, int n, const float* vecs, const float* another, float* results ) = 0;
284 enum { C_SVC=100, NU_SVC=101, ONE_CLASS=102, EPS_SVR=103, NU_SVR=104 };
287 enum { CUSTOM=-1, LINEAR=0, POLY=1, RBF=2, SIGMOID=3, CHI2=4, INTER=5 };
290 enum { C=0, GAMMA=1, P=2, NU=3, COEF=4, DEGREE=5 };
292 virtual bool trainAuto( const Ptr<TrainData>& data, int kFold = 10,
293 ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C),
294 ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA),
295 ParamGrid pGrid = SVM::getDefaultGrid(SVM::P),
296 ParamGrid nuGrid = SVM::getDefaultGrid(SVM::NU),
297 ParamGrid coeffGrid = SVM::getDefaultGrid(SVM::COEF),
298 ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE),
299 bool balanced=false) = 0;
301 CV_WRAP virtual Mat getSupportVectors() const = 0;
303 virtual void setParams(const Params& p, const Ptr<Kernel>& customKernel=Ptr<Kernel>()) = 0;
304 virtual Params getParams() const = 0;
305 virtual Ptr<Kernel> getKernel() const = 0;
306 virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0;
308 static ParamGrid getDefaultGrid( int param_id );
309 static Ptr<SVM> create(const Params& p=Params(), const Ptr<Kernel>& customKernel=Ptr<Kernel>());
312 /****************************************************************************************\
313 * Expectation - Maximization *
314 \****************************************************************************************/
315 class CV_EXPORTS_W EM : public StatModel
318 // Type of covariation matrices
319 enum {COV_MAT_SPHERICAL=0, COV_MAT_DIAGONAL=1, COV_MAT_GENERIC=2, COV_MAT_DEFAULT=COV_MAT_DIAGONAL};
321 // Default parameters
322 enum {DEFAULT_NCLUSTERS=5, DEFAULT_MAX_ITERS=100};
325 enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
327 class CV_EXPORTS_W_MAP Params
330 explicit Params(int nclusters=DEFAULT_NCLUSTERS, int covMatType=EM::COV_MAT_DIAGONAL,
331 const TermCriteria& termCrit=TermCriteria(TermCriteria::COUNT+TermCriteria::EPS,
332 EM::DEFAULT_MAX_ITERS, 1e-6));
333 CV_PROP_RW int nclusters;
334 CV_PROP_RW int covMatType;
335 CV_PROP_RW TermCriteria termCrit;
338 virtual void setParams(const Params& p) = 0;
339 virtual Params getParams() const = 0;
340 virtual Mat getWeights() const = 0;
341 virtual Mat getMeans() const = 0;
342 virtual void getCovs(std::vector<Mat>& covs) const = 0;
344 CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0;
346 virtual bool train( const Ptr<TrainData>& trainData, int flags=0 ) = 0;
348 static Ptr<EM> train(InputArray samples,
349 OutputArray logLikelihoods=noArray(),
350 OutputArray labels=noArray(),
351 OutputArray probs=noArray(),
352 const Params& params=Params());
354 static Ptr<EM> train_startWithE(InputArray samples, InputArray means0,
355 InputArray covs0=noArray(),
356 InputArray weights0=noArray(),
357 OutputArray logLikelihoods=noArray(),
358 OutputArray labels=noArray(),
359 OutputArray probs=noArray(),
360 const Params& params=Params());
362 static Ptr<EM> train_startWithM(InputArray samples, InputArray probs0,
363 OutputArray logLikelihoods=noArray(),
364 OutputArray labels=noArray(),
365 OutputArray probs=noArray(),
366 const Params& params=Params());
367 static Ptr<EM> create(const Params& params=Params());
371 /****************************************************************************************\
373 \****************************************************************************************/
375 class CV_EXPORTS_W DTrees : public StatModel
378 enum { PREDICT_AUTO=0, PREDICT_SUM=(1<<8), PREDICT_MAX_VOTE=(2<<8), PREDICT_MASK=(3<<8) };
380 class CV_EXPORTS_W_MAP Params
384 Params( int maxDepth, int minSampleCount,
385 double regressionAccuracy, bool useSurrogates,
386 int maxCategories, int CVFolds,
387 bool use1SERule, bool truncatePrunedTree,
390 CV_PROP_RW int maxCategories;
391 CV_PROP_RW int maxDepth;
392 CV_PROP_RW int minSampleCount;
393 CV_PROP_RW int CVFolds;
394 CV_PROP_RW bool useSurrogates;
395 CV_PROP_RW bool use1SERule;
396 CV_PROP_RW bool truncatePrunedTree;
397 CV_PROP_RW float regressionAccuracy;
398 CV_PROP_RW Mat priors;
401 class CV_EXPORTS Node
416 class CV_EXPORTS Split
428 virtual void setDParams(const Params& p);
429 virtual Params getDParams() const;
431 virtual const std::vector<int>& getRoots() const = 0;
432 virtual const std::vector<Node>& getNodes() const = 0;
433 virtual const std::vector<Split>& getSplits() const = 0;
434 virtual const std::vector<int>& getSubsets() const = 0;
436 static Ptr<DTrees> create(const Params& params=Params());
439 /****************************************************************************************\
440 * Random Trees Classifier *
441 \****************************************************************************************/
443 class CV_EXPORTS_W RTrees : public DTrees
446 class CV_EXPORTS_W_MAP Params : public DTrees::Params
450 Params( int maxDepth, int minSampleCount,
451 double regressionAccuracy, bool useSurrogates,
452 int maxCategories, const Mat& priors,
453 bool calcVarImportance, int nactiveVars,
454 TermCriteria termCrit );
456 CV_PROP_RW bool calcVarImportance; // true <=> RF processes variable importance
457 CV_PROP_RW int nactiveVars;
458 CV_PROP_RW TermCriteria termCrit;
461 virtual void setRParams(const Params& p) = 0;
462 virtual Params getRParams() const = 0;
464 virtual Mat getVarImportance() const = 0;
466 static Ptr<RTrees> create(const Params& params=Params());
469 /****************************************************************************************\
470 * Boosted tree classifier *
471 \****************************************************************************************/
473 class CV_EXPORTS_W Boost : public DTrees
476 class CV_EXPORTS_W_MAP Params : public DTrees::Params
479 CV_PROP_RW int boostType;
480 CV_PROP_RW int weakCount;
481 CV_PROP_RW double weightTrimRate;
484 Params( int boostType, int weakCount, double weightTrimRate,
485 int maxDepth, bool useSurrogates, const Mat& priors );
489 enum { DISCRETE=0, REAL=1, LOGIT=2, GENTLE=3 };
491 virtual Params getBParams() const = 0;
492 virtual void setBParams(const Params& p) = 0;
494 static Ptr<Boost> create(const Params& params=Params());
497 /****************************************************************************************\
498 * Gradient Boosted Trees *
499 \****************************************************************************************/
501 /*class CV_EXPORTS_W GBTrees : public DTrees
504 struct CV_EXPORTS_W_MAP Params : public DTrees::Params
506 CV_PROP_RW int weakCount;
507 CV_PROP_RW int lossFunctionType;
508 CV_PROP_RW float subsamplePortion;
509 CV_PROP_RW float shrinkage;
512 Params( int lossFunctionType, int weakCount, float shrinkage,
513 float subsamplePortion, int maxDepth, bool useSurrogates );
516 enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
518 virtual void setK(int k) = 0;
520 virtual float predictSerial( InputArray samples,
521 OutputArray weakResponses, int flags) const = 0;
523 static Ptr<GBTrees> create(const Params& p);
526 /****************************************************************************************\
527 * Artificial Neural Networks (ANN) *
528 \****************************************************************************************/
530 /////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////
532 class CV_EXPORTS_W ANN_MLP : public StatModel
535 struct CV_EXPORTS_W_MAP Params
538 Params( const Mat& layerSizes, int activateFunc, double fparam1, double fparam2,
539 TermCriteria termCrit, int trainMethod, double param1, double param2=0 );
541 enum { BACKPROP=0, RPROP=1 };
543 CV_PROP_RW Mat layerSizes;
544 CV_PROP_RW int activateFunc;
545 CV_PROP_RW double fparam1;
546 CV_PROP_RW double fparam2;
548 CV_PROP_RW TermCriteria termCrit;
549 CV_PROP_RW int trainMethod;
551 // backpropagation parameters
552 CV_PROP_RW double bpDWScale, bpMomentScale;
555 CV_PROP_RW double rpDW0, rpDWPlus, rpDWMinus, rpDWMin, rpDWMax;
558 // possible activation functions
559 enum { IDENTITY = 0, SIGMOID_SYM = 1, GAUSSIAN = 2 };
561 // available training flags
562 enum { UPDATE_WEIGHTS = 1, NO_INPUT_SCALE = 2, NO_OUTPUT_SCALE = 4 };
564 virtual Mat getWeights(int layerIdx) const = 0;
565 virtual void setParams(const Params& p) = 0;
566 virtual Params getParams() const = 0;
568 static Ptr<ANN_MLP> create(const Params& params=Params());
571 /****************************************************************************************\
572 * Logistic Regression *
573 \****************************************************************************************/
575 class CV_EXPORTS LogisticRegression : public StatModel
578 class CV_EXPORTS Params
581 Params(double learning_rate = 0.001,
583 int method = LogisticRegression::BATCH,
584 int normlization = LogisticRegression::REG_L2,
593 cv::TermCriteria term_crit;
596 enum { REG_L1 = 0, REG_L2 = 1};
597 enum { BATCH = 0, MINI_BATCH = 1};
599 // Algorithm interface
600 virtual void write( FileStorage &fs ) const = 0;
601 virtual void read( const FileNode &fn ) = 0;
603 // StatModel interface
604 virtual bool train( const Ptr<TrainData>& trainData, int flags=0 ) = 0;
605 virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
606 virtual void clear() = 0;
608 virtual Mat get_learnt_thetas() const = 0;
610 static Ptr<LogisticRegression> create( const Params& params = Params() );
613 /****************************************************************************************\
614 * Auxilary functions declarations *
615 \****************************************************************************************/
617 /* Generates <sample> from multivariate normal distribution, where <mean> - is an
618 average row vector, <cov> - symmetric covariation matrix */
619 CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, OutputArray samples);
621 /* Generates sample from gaussian mixture distribution */
622 CV_EXPORTS void randGaussMixture( InputArray means, InputArray covs, InputArray weights,
623 int nsamples, OutputArray samples, OutputArray sampClasses );
625 /* creates test set */
626 CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
627 OutputArray samples, OutputArray responses);
632 #endif // __cplusplus
633 #endif // __OPENCV_ML_HPP__