1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
10 // Intel License Agreement
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
26 // * The name of Intel Corporation may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
48 #ifndef _CVCLASSIFIER_H_
49 #define _CVCLASSIFIER_H_
56 /* Convert matrix to vector */
57 #define CV_MAT2VEC( mat, vdata, vstep, num ) \
58 assert( (mat).rows == 1 || (mat).cols == 1 ); \
59 (vdata) = ((mat).data.ptr); \
60 if( (mat).rows == 1 ) \
62 (vstep) = CV_ELEM_SIZE( (mat).type ); \
67 (vstep) = (mat).step; \
71 /* Set up <sample> matrix header to be <num> sample of <trainData> samples matrix */
72 #define CV_GET_SAMPLE( trainData, tdflags, num, sample ) \
73 if( CV_IS_ROW_SAMPLE( tdflags ) ) \
75 cvInitMatHeader( &(sample), 1, (trainData).cols, \
76 CV_MAT_TYPE( (trainData).type ), \
77 ((trainData).data.ptr + (num) * (trainData).step), \
82 cvInitMatHeader( &(sample), (trainData).rows, 1, \
83 CV_MAT_TYPE( (trainData).type ), \
84 ((trainData).data.ptr + (num) * CV_ELEM_SIZE( (trainData).type )), \
88 #define CV_GET_SAMPLE_STEP( trainData, tdflags, sstep ) \
89 (sstep) = ( ( CV_IS_ROW_SAMPLE( tdflags ) ) \
90 ? (trainData).step : CV_ELEM_SIZE( (trainData).type ) );
93 #define CV_LOGRATIO_THRESHOLD 0.00001F
95 /* log( val / (1 - val ) ) */
96 CV_INLINE float cvLogRatio( float val );
98 CV_INLINE float cvLogRatio( float val )
102 tval = MAX(CV_LOGRATIO_THRESHOLD, MIN( 1.0F - CV_LOGRATIO_THRESHOLD, (val) ));
103 return logf( tval / (1.0F - tval) );
107 /* flags values for classifier consturctor flags parameter */
109 /* each trainData matrix column is a sample */
110 #define CV_COL_SAMPLE 0
112 /* each trainData matrix row is a sample */
113 #define CV_ROW_SAMPLE 1
115 #ifndef CV_IS_ROW_SAMPLE
116 # define CV_IS_ROW_SAMPLE( flags ) ( ( flags ) & CV_ROW_SAMPLE )
119 /* Classifier supports tune function */
120 #define CV_TUNABLE (1 << 1)
122 #define CV_IS_TUNABLE( flags ) ( (flags) & CV_TUNABLE )
125 /* classifier fields common to all classifiers */
126 #define CV_CLASSIFIER_FIELDS() \
128 float(*eval)( struct CvClassifier*, CvMat* ); \
129 void (*tune)( struct CvClassifier*, CvMat*, int flags, CvMat*, CvMat*, CvMat*, \
131 int (*save)( struct CvClassifier*, const char* file_name ); \
132 void (*release)( struct CvClassifier** );
134 typedef struct CvClassifier
136 CV_CLASSIFIER_FIELDS()
139 #define CV_CLASSIFIER_TRAIN_PARAM_FIELDS()
140 typedef struct CvClassifierTrainParams
142 CV_CLASSIFIER_TRAIN_PARAM_FIELDS()
143 } CvClassifierTrainParams;
147 Common classifier constructor:
148 CvClassifier* cvCreateMyClassifier( CvMat* trainData,
152 CvMat* missedMeasurementsMask CV_DEFAULT(0),
153 CvCompIdx* compIdx CV_DEFAULT(0),
154 CvMat* sampleIdx CV_DEFAULT(0),
155 CvMat* weights CV_DEFAULT(0),
156 CvClassifierTrainParams* trainParams CV_DEFAULT(0)
161 typedef CvClassifier* (*CvClassifierConstructor)( CvMat*, int, CvMat*, CvMat*, CvMat*,
162 CvMat*, CvMat*, CvMat*,
163 CvClassifierTrainParams* );
165 typedef enum CvStumpType
167 CV_CLASSIFICATION = 0,
168 CV_CLASSIFICATION_CLASS = 1,
172 typedef enum CvStumpError
174 CV_MISCLASSIFICATION = 0,
181 typedef struct CvStumpTrainParams
183 CV_CLASSIFIER_TRAIN_PARAM_FIELDS()
186 } CvStumpTrainParams;
188 typedef struct CvMTStumpTrainParams
190 CV_CLASSIFIER_TRAIN_PARAM_FIELDS()
193 int portion; /* number of components calculated in each thread */
194 int numcomp; /* total number of components */
196 /* callback which fills <mat> with components [first, first+num[ */
197 void (*getTrainData)( CvMat* mat, CvMat* sampleIdx, CvMat* compIdx,
198 int first, int num, void* userdata );
199 CvMat* sortedIdx; /* presorted samples indices */
200 void* userdata; /* passed to callback */
201 } CvMTStumpTrainParams;
203 typedef struct CvStumpClassifier
205 CV_CLASSIFIER_FIELDS()
208 float lerror; /* impurity of the right node */
209 float rerror; /* impurity of the left node */
216 typedef struct CvCARTTrainParams
218 CV_CLASSIFIER_TRAIN_PARAM_FIELDS()
219 /* desired number of internal nodes */
221 CvClassifierTrainParams* stumpTrainParams;
222 CvClassifierConstructor stumpConstructor;
225 * Split sample indices <idx>
226 * on the "left" indices <left> and "right" indices <right>
227 * according to samples components <compidx> values and <threshold>.
229 * NOTE: Matrices <left> and <right> must be allocated using cvCreateMat function
230 * since they are freed using cvReleaseMat function
232 * If it is NULL then the default implementation which evaluates training
233 * samples from <trainData> passed to classifier constructor is used
235 void (*splitIdx)( int compidx, float threshold,
236 CvMat* idx, CvMat** left, CvMat** right,
241 typedef struct CvCARTClassifier
243 CV_CLASSIFIER_FIELDS()
244 /* number of internal nodes */
247 /* internal nodes (each array of <count> elements) */
253 /* leaves (array of <count>+1 elements) */
258 void cvGetSortedIndices( CvMat* val, CvMat* idx, int sortcols CV_DEFAULT( 0 ) );
261 void cvReleaseStumpClassifier( CvClassifier** classifier );
264 float cvEvalStumpClassifier( CvClassifier* classifier, CvMat* sample );
267 CvClassifier* cvCreateStumpClassifier( CvMat* trainData,
271 CvMat* missedMeasurementsMask CV_DEFAULT(0),
272 CvMat* compIdx CV_DEFAULT(0),
273 CvMat* sampleIdx CV_DEFAULT(0),
274 CvMat* weights CV_DEFAULT(0),
275 CvClassifierTrainParams* trainParams CV_DEFAULT(0) );
278 * cvCreateMTStumpClassifier
280 * Multithreaded stump classifier constructor
281 * Includes huge train data support through callback function
284 CvClassifier* cvCreateMTStumpClassifier( CvMat* trainData,
288 CvMat* missedMeasurementsMask,
292 CvClassifierTrainParams* trainParams );
295 * cvCreateCARTClassifier
297 * CART classifier constructor
300 CvClassifier* cvCreateCARTClassifier( CvMat* trainData,
304 CvMat* missedMeasurementsMask,
308 CvClassifierTrainParams* trainParams );
311 void cvReleaseCARTClassifier( CvClassifier** classifier );
314 float cvEvalCARTClassifier( CvClassifier* classifier, CvMat* sample );
316 /****************************************************************************************\
318 \****************************************************************************************/
323 * The CvBoostType enumeration specifies the boosting type.
326 * Four different boosting variants for 2 class classification problems are supported:
327 * Discrete AdaBoost, Real AdaBoost, LogitBoost and Gentle AdaBoost.
328 * The L2 (2 class classification problems) and LK (K class classification problems)
329 * algorithms are close to LogitBoost but more numerically stable than last one.
330 * For regression three different loss functions are supported:
331 * Least square, least absolute deviation and huber loss.
333 typedef enum CvBoostType
335 CV_DABCLASS = 0, /* 2 class Discrete AdaBoost */
336 CV_RABCLASS = 1, /* 2 class Real AdaBoost */
337 CV_LBCLASS = 2, /* 2 class LogitBoost */
338 CV_GABCLASS = 3, /* 2 class Gentle AdaBoost */
339 CV_L2CLASS = 4, /* classification (2 class problem) */
340 CV_LKCLASS = 5, /* classification (K class problem) */
341 CV_LSREG = 6, /* least squares regression */
342 CV_LADREG = 7, /* least absolute deviation regression */
343 CV_MREG = 8 /* M-regression (Huber loss) */
346 /****************************************************************************************\
347 * Iterative training functions *
348 \****************************************************************************************/
353 * The CvBoostTrainer structure represents internal boosting trainer.
355 typedef struct CvBoostTrainer CvBoostTrainer;
358 * cvBoostStartTraining
360 * The cvBoostStartTraining function starts training process and calculates
361 * response values and weights for the first weak classifier training.
365 * Vector of classes of training samples classes. Each element must be 0 or 1 and
368 * Vector of response values for the first trained weak classifier.
369 * Must be of type CV_32FC1.
371 * Weight vector of training samples for the first trained weak classifier.
372 * Must be of type CV_32FC1.
374 * Boosting type. CV_DABCLASS, CV_RABCLASS, CV_LBCLASS, CV_GABCLASS
375 * types are supported.
378 * The return value is a pointer to internal trainer structure which is used
379 * to perform next training iterations.
382 * weakTrainVals and weights must be allocated before calling the function
383 * and of the same size as trainingClasses. Usually weights should be initialized
385 * The function calculates response values and weights for the first weak
386 * classifier training and stores them into weakTrainVals and weights
388 * Note, the training of the weak classifier using weakTrainVals, weight,
389 * trainingData is outside of this function.
392 CvBoostTrainer* cvBoostStartTraining( CvMat* trainClasses,
393 CvMat* weakTrainVals,
398 * cvBoostNextWeakClassifier
400 * The cvBoostNextWeakClassifier function performs next training
401 * iteration and caluclates response values and weights for the next weak
402 * classifier training.
406 * Vector of values obtained by evaluation of each sample with
407 * the last trained weak classifier (iteration i). Must be of CV_32FC1 type.
409 * Vector of classes of training samples. Each element must be 0 or 1,
410 * and of type CV_32FC1.
412 * Vector of response values for the next weak classifier training
413 * (iteration i+1). Must be of type CV_32FC1.
415 * Weight vector of training samples for the next weak classifier training
416 * (iteration i+1). Must be of type CV_32FC1.
418 * A pointer to internal trainer returned by the cvBoostStartTraining
422 * The return value is the coefficient for the last trained weak classifier.
425 * weakTrainVals and weights must be exactly the same vectors as used in
426 * the cvBoostStartTraining function call and should not be modified.
427 * The function calculates response values and weights for the next weak
428 * classifier training and stores them into weakTrainVals and weights
430 * Note, the training of the weak classifier of iteration i+1 using
431 * weakTrainVals, weight, trainingData is outside of this function.
434 float cvBoostNextWeakClassifier( CvMat* weakEvalVals,
436 CvMat* weakTrainVals,
438 CvBoostTrainer* trainer );
443 * The cvBoostEndTraining function finishes training process and releases
444 * internally allocated memory.
448 * A pointer to a pointer to internal trainer returned by the cvBoostStartTraining
452 void cvBoostEndTraining( CvBoostTrainer** trainer );
454 /****************************************************************************************\
455 * Boosted tree models *
456 \****************************************************************************************/
461 * The CvBtClassifier structure represents boosted tree model.
465 * Flags. If CV_IS_TUNABLE( flags ) != 0 then the model supports tuning.
467 * Evaluation function. Returns sample predicted class (0, 1, etc.)
468 * for classification or predicted value for regression.
470 * Tune function. If the model supports tuning then tune call performs
471 * one more boosting iteration if passed to the function flags parameter
472 * is CV_TUNABLE otherwise releases internally allocated for tuning memory
473 * and makes the model untunable.
474 * NOTE: Since tuning uses the pointers to parameters,
475 * passed to the cvCreateBtClassifier function, they should not be modified
476 * or released between tune calls.
478 * This function stores the model into given file.
480 * This function releases the model.
482 * Boosted tree model type.
484 * Number of classes for CV_LKCLASS type or 1 for all other types.
486 * Number of iterations. Number of weak classifiers is equal to number
487 * of iterations for all types except CV_LKCLASS. For CV_LKCLASS type
488 * number of weak classifiers is (numiter * numclasses).
490 * Number of features in sample.
492 * Stores weak classifiers when the model does not support tuning.
494 * Stores weak classifiers when the model supports tuning.
496 * Pointer to internal tuning parameters if the model supports tuning.
498 typedef struct CvBtClassifier
500 CV_CLASSIFIER_FIELDS()
508 CvCARTClassifier** trees;
515 * CvBtClassifierTrainParams
517 * The CvBtClassifierTrainParams structure stores training parameters for
518 * boosted tree model.
522 * Boosted tree model type.
524 * Desired number of iterations.
526 * Parameter Model Type Parameter Meaning
527 * param[0] Any Shrinkage factor
528 * param[1] CV_MREG alpha. (1-alpha) determines "break-down" point of
529 * the training procedure, i.e. the fraction of samples
530 * that can be arbitrary modified without serious
531 * degrading the quality of the result.
532 * CV_DABCLASS, Weight trimming factor.
539 * Desired number of splits in each tree.
541 typedef struct CvBtClassifierTrainParams
543 CV_CLASSIFIER_TRAIN_PARAM_FIELDS()
549 } CvBtClassifierTrainParams;
552 * cvCreateBtClassifier
554 * The cvCreateBtClassifier function creates boosted tree model.
558 * Matrix of feature values. Must have CV_32FC1 type.
560 * Determines how samples are stored in trainData.
561 * One of CV_ROW_SAMPLE or CV_COL_SAMPLE.
562 * Optionally may be combined with CV_TUNABLE to make tunable model.
564 * Vector of responses for regression or classes (0, 1, 2, etc.) for classification.
566 * missedMeasurementsMask,
568 * Not supported. Must be NULL.
570 * Indices of samples used in training. If NULL then all samples are used.
571 * For CV_DABCLASS, CV_RABCLASS, CV_LBCLASS and CV_GABCLASS must be NULL.
573 * Not supported. Must be NULL.
575 * A pointer to CvBtClassifierTrainParams structure. Training parameters.
576 * See CvBtClassifierTrainParams description for details.
579 * The return value is a pointer to created boosted tree model of type CvBtClassifier.
582 * The function performs trainParams->numiter training iterations.
583 * If CV_TUNABLE flag is specified then created model supports tuning.
584 * In this case additional training iterations may be performed by
585 * tune function call.
588 CvClassifier* cvCreateBtClassifier( CvMat* trainData,
592 CvMat* missedMeasurementsMask,
596 CvClassifierTrainParams* trainParams );
599 * cvCreateBtClassifierFromFile
601 * The cvCreateBtClassifierFromFile function restores previously saved
602 * boosted tree model from file.
606 * The name of the file with boosted tree model.
609 * The restored model does not support tuning.
612 CvClassifier* cvCreateBtClassifierFromFile( const char* filename );
614 /****************************************************************************************\
615 * Utility functions *
616 \****************************************************************************************/
621 * The cvTrimWeights function performs weight trimming.
627 * Indices vector of weights that should be considered.
628 * If it is NULL then all weights are used.
630 * Weight trimming factor. Must be in [0, 1] range.
633 * The return value is a vector of indices. If all samples should be used then
634 * it is equal to idx. In other case the cvReleaseMat function should be called
640 CvMat* cvTrimWeights( CvMat* weights, CvMat* idx, float factor );
645 * The cvReadTrainData function reads feature values and responses from file.
649 * The name of the file to be read.
651 * One of CV_ROW_SAMPLE or CV_COL_SAMPLE. Determines how feature values
654 * A pointer to a pointer to created matrix with feature values.
655 * cvReleaseMat function should be used to destroy created matrix.
657 * A pointer to a pointer to created matrix with response values.
658 * cvReleaseMat function should be used to destroy created matrix.
662 * ============================================
664 * value_1_1 value_1_2 ... value_1_n response_1
665 * value_2_1 value_2_2 ... value_2_n response_2
667 * value_m_1 value_m_2 ... value_m_n response_m
668 * ============================================
672 * Number of features in each sample
674 * Value of j-th feature of i-th sample
676 * Response value of i-th sample
677 * For classification problems responses represent classes (0, 1, etc.)
678 * All values and classes are integer or real numbers.
681 void cvReadTrainData( const char* filename,
684 CvMat** trainClasses );
690 * The cvWriteTrainData function stores feature values and responses into file.
694 * The name of the file.
696 * One of CV_ROW_SAMPLE or CV_COL_SAMPLE. Determines how feature values
699 * Feature values matrix.
701 * Response values vector.
703 * Vector of idicies of the samples that should be stored. If it is NULL
704 * then all samples will be stored.
707 * See the cvReadTrainData function for file format description.
710 void cvWriteTrainData( const char* filename,
719 * The cvRandShuffle function perfroms random shuffling of given vector.
723 * Vector that should be shuffled.
724 * Must have CV_8UC1, CV_16SC1, CV_32SC1 or CV_32FC1 type.
727 void cvRandShuffleVec( CvMat* vector );
729 #endif /* _CVCLASSIFIER_H_ */