}
float low_pass_gain; // low pass gain
- CvEMParams em_params; // EM parameters
+ cv::EM::Params em_params; // EM parameters
};
// Mean Shift Tracker parameters for specifying use of HSV channel and CamShift parameters.
float ms_tracker_weight;
CvFeatureTrackerParams ft_params;
CvMeanShiftTrackerParams ms_params;
- CvEMParams em_params;
+ cv::EM::Params em_params;
int motion_model;
float low_pass_gain;
};
CvMat* samples;
CvMat* labels;
- CvEM em_model;
+ cv::EM em_model;
Rect prev_window;
Point2f prev_center;
params.em_params.probs = NULL;
params.em_params.nclusters = 1;
params.em_params.weights = NULL;
- params.em_params.cov_mat_type = CvEM::COV_MAT_SPHERICAL;
- params.em_params.start_step = CvEM::START_AUTO_STEP;
- params.em_params.term_crit.max_iter = 10000;
- params.em_params.term_crit.epsilon = 0.001;
- params.em_params.term_crit.type = CV_TERMCRIT_ITER | CV_TERMCRIT_EPS;
+ params.em_params.covMatType = cv::EM::COV_MAT_SPHERICAL;
+ params.em_params.startStep = cv::EM::START_AUTO_STEP;
+ params.em_params.termCrit.maxCount = 10000;
+ params.em_params.termCrit.epsilon = 0.001;
+ params.em_params.termCrit.type = cv::TermCriteria::COUNT + cv::TermCriteria::EPS;
samples = cvCreateMat(2, 1, CV_32FC1);
labels = cvCreateMat(2, 1, CV_32SC1);
count++;
}
- em_model.train(samples, 0, params.em_params, labels);
+ cv::Mat lbls;
+ em_model.train(samples, cv::Mat(), params.em_params, &lbls);
+ if(labels)
+ *labels = lbls;
curr_center.x = (float)em_model.getMeans().at<double> (0, 0);
curr_center.y = (float)em_model.getMeans().at<double> (0, 1);
-ocv_define_module(legacy opencv_calib3d opencv_highgui opencv_video)
+ocv_define_module(legacy opencv_calib3d opencv_highgui opencv_video opencv_ml)
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/ml/ml.hpp"
#ifdef __cplusplus
extern "C" {
IplImage* m_mask;
};
+/****************************************************************************************\
+* Expectation - Maximization *
+\****************************************************************************************/
+struct CV_EXPORTS_W_MAP CvEMParams
+{
+ CvEMParams();
+ CvEMParams( int nclusters, int cov_mat_type=1/*CvEM::COV_MAT_DIAGONAL*/,
+ int start_step=0/*CvEM::START_AUTO_STEP*/,
+ CvTermCriteria term_crit=cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 100, FLT_EPSILON),
+ const CvMat* probs=0, const CvMat* weights=0, const CvMat* means=0, const CvMat** covs=0 );
+
+ CV_PROP_RW int nclusters;
+ CV_PROP_RW int cov_mat_type;
+ CV_PROP_RW int start_step;
+ const CvMat* probs;
+ const CvMat* weights;
+ const CvMat* means;
+ const CvMat** covs;
+ CV_PROP_RW CvTermCriteria term_crit;
+};
+
+
+class CV_EXPORTS_W CvEM : public CvStatModel
+{
+public:
+ // Type of covariation matrices
+ enum { COV_MAT_SPHERICAL=cv::EM::COV_MAT_SPHERICAL,
+ COV_MAT_DIAGONAL =cv::EM::COV_MAT_DIAGONAL,
+ COV_MAT_GENERIC =cv::EM::COV_MAT_GENERIC };
+
+ // The initial step
+ enum { START_E_STEP=cv::EM::START_E_STEP,
+ START_M_STEP=cv::EM::START_M_STEP,
+ START_AUTO_STEP=cv::EM::START_AUTO_STEP };
+
+ CV_WRAP CvEM();
+ CvEM( const CvMat* samples, const CvMat* sampleIdx=0,
+ CvEMParams params=CvEMParams(), CvMat* labels=0 );
+
+ virtual ~CvEM();
+
+ virtual bool train( const CvMat* samples, const CvMat* sampleIdx=0,
+ CvEMParams params=CvEMParams(), CvMat* labels=0 );
+
+ virtual float predict( const CvMat* sample, CV_OUT CvMat* probs, bool isNormalize=true ) const;
+
+#ifndef SWIG
+ CV_WRAP CvEM( const cv::Mat& samples, const cv::Mat& sampleIdx=cv::Mat(),
+ CvEMParams params=CvEMParams() );
+
+ CV_WRAP virtual bool train( const cv::Mat& samples,
+ const cv::Mat& sampleIdx=cv::Mat(),
+ CvEMParams params=CvEMParams(),
+ CV_OUT cv::Mat* labels=0 );
+
+ CV_WRAP virtual float predict( const cv::Mat& sample, CV_OUT cv::Mat* probs=0, bool isNormalize=true ) const;
+ CV_WRAP virtual double calcLikelihood( const cv::Mat &sample ) const;
+
+ CV_WRAP int getNClusters() const;
+ CV_WRAP const cv::Mat& getMeans() const;
+ CV_WRAP void getCovs(CV_OUT std::vector<cv::Mat>& covs) const;
+ CV_WRAP const cv::Mat& getWeights() const;
+ CV_WRAP const cv::Mat& getProbs() const;
+
+ CV_WRAP inline double getLikelihood() const { return emObj.isTrained() ? likelihood : DBL_MAX; }
+#endif
+
+ CV_WRAP virtual void clear();
+
+ int get_nclusters() const;
+ const CvMat* get_means() const;
+ const CvMat** get_covs() const;
+ const CvMat* get_weights() const;
+ const CvMat* get_probs() const;
+
+ inline double get_log_likelihood() const { return getLikelihood(); }
+
+ virtual void read( CvFileStorage* fs, CvFileNode* node );
+ virtual void write( CvFileStorage* fs, const char* name ) const;
+
+protected:
+ void set_mat_hdrs();
+
+ cv::EM emObj;
+ cv::Mat probs;
+ double likelihood;
+
+ CvMat meansHdr;
+ std::vector<CvMat> covsHdrs;
+ std::vector<CvMat*> covsPtrs;
+ CvMat weightsHdr;
+ CvMat probsHdr;
+};
namespace cv
{
+typedef CvEMParams EMParams;
+typedef CvEM ExpectationMaximization;
+
/*!
The Patch Generator class
*/
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright( C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+//(including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort(including negligence or otherwise) arising in any way out of
+// the use of this software, even ifadvised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+CvEMParams::CvEMParams() : nclusters(10), cov_mat_type(CvEM::COV_MAT_DIAGONAL),
+ start_step(CvEM::START_AUTO_STEP), probs(0), weights(0), means(0), covs(0)
+{
+ term_crit=cvTermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 100, FLT_EPSILON );
+}
+
+CvEMParams::CvEMParams( int _nclusters, int _cov_mat_type, int _start_step,
+ CvTermCriteria _term_crit, const CvMat* _probs,
+ const CvMat* _weights, const CvMat* _means, const CvMat** _covs ) :
+ nclusters(_nclusters), cov_mat_type(_cov_mat_type), start_step(_start_step),
+ probs(_probs), weights(_weights), means(_means), covs(_covs), term_crit(_term_crit)
+{}
+
+CvEM::CvEM() : likelihood(DBL_MAX)
+{
+}
+
+CvEM::CvEM( const CvMat* samples, const CvMat* sample_idx,
+ CvEMParams params, CvMat* labels ) : likelihood(DBL_MAX)
+{
+ train(samples, sample_idx, params, labels);
+}
+
+CvEM::~CvEM()
+{
+ clear();
+}
+
+void CvEM::clear()
+{
+ emObj.clear();
+}
+
+void CvEM::read( CvFileStorage* fs, CvFileNode* node )
+{
+ cv::FileNode fn(fs, node);
+ emObj.read(fn);
+ set_mat_hdrs();
+}
+
+void CvEM::write( CvFileStorage* _fs, const char* name ) const
+{
+ cv::FileStorage fs = _fs;
+ if(name)
+ fs << name << "{";
+ emObj.write(fs);
+ if(name)
+ fs << "}";
+}
+
+double CvEM::calcLikelihood( const cv::Mat &input_sample ) const
+{
+ double likelihood;
+ emObj.predict(input_sample, 0, &likelihood);
+ return likelihood;
+}
+
+float
+CvEM::predict( const CvMat* _sample, CvMat* _probs, bool isNormalize ) const
+{
+ cv::Mat prbs;
+ int cls = emObj.predict(_sample, _probs ? &prbs : 0);
+ if(_probs)
+ {
+ if(isNormalize)
+ cv::normalize(prbs, prbs, 1, 0, cv::NORM_L1);
+ *_probs = prbs;
+ }
+ return (float)cls;
+}
+
+void CvEM::set_mat_hdrs()
+{
+ if(emObj.isTrained())
+ {
+ meansHdr = emObj.getMeans();
+ covsHdrs.resize(emObj.getNClusters());
+ covsPtrs.resize(emObj.getNClusters());
+ const std::vector<cv::Mat>& covs = emObj.getCovs();
+ for(size_t i = 0; i < covsHdrs.size(); i++)
+ {
+ covsHdrs[i] = covs[i];
+ covsPtrs[i] = &covsHdrs[i];
+ }
+ weightsHdr = emObj.getWeights();
+ probsHdr = probs;
+ }
+}
+
+static
+void init_params(const CvEMParams& src, cv::EM::Params& dst,
+ cv::Mat& prbs, cv::Mat& weights,
+ cv::Mat& means, cv::vector<cv::Mat>& covsHdrs)
+{
+ dst.nclusters = src.nclusters;
+ dst.covMatType = src.cov_mat_type;
+ dst.startStep = src.start_step;
+ dst.termCrit = src.term_crit;
+
+ prbs = src.probs;
+ dst.probs = &prbs;
+
+ weights = src.weights;
+ dst.weights = &weights;
+
+ means = src.means;
+ dst.means = &means;
+
+ if(src.covs)
+ {
+ covsHdrs.resize(src.nclusters);
+ for(size_t i = 0; i < covsHdrs.size(); i++)
+ covsHdrs[i] = src.covs[i];
+ dst.covs = &covsHdrs;
+ }
+}
+
+bool CvEM::train( const CvMat* _samples, const CvMat* _sample_idx,
+ CvEMParams _params, CvMat* _labels )
+{
+ cv::EM::Params params;
+ cv::Mat prbs, weights, means;
+ std::vector<cv::Mat> covsHdrs;
+ init_params(_params, params, prbs, weights, means, covsHdrs);
+
+ cv::Mat lbls;
+ cv::Mat likelihoods;
+ bool isOk = emObj.train(_samples, _sample_idx, params, _labels ? &lbls : 0, &probs, &likelihoods );
+ if(isOk)
+ {
+ if(_labels)
+ *_labels = lbls;
+ likelihood = cv::sum(likelihoods)[0];
+ set_mat_hdrs();
+ }
+
+ return isOk;
+}
+
+int CvEM::get_nclusters() const
+{
+ return emObj.getNClusters();
+}
+
+const CvMat* CvEM::get_means() const
+{
+ return emObj.isTrained() ? &meansHdr : 0;
+}
+
+const CvMat** CvEM::get_covs() const
+{
+ return emObj.isTrained() ? (const CvMat**)&covsPtrs[0] : 0;
+}
+
+const CvMat* CvEM::get_weights() const
+{
+ return emObj.isTrained() ? &weightsHdr : 0;
+}
+
+const CvMat* CvEM::get_probs() const
+{
+ return emObj.isTrained() ? &probsHdr : 0;
+}
+
+using namespace cv;
+
+CvEM::CvEM( const Mat& samples, const Mat& sample_idx, CvEMParams params )
+{
+ train(samples, sample_idx, params, 0);
+}
+
+bool CvEM::train( const Mat& _samples, const Mat& _sample_idx,
+ CvEMParams _params, Mat* _labels )
+{
+ cv::EM::Params params;
+ cv::Mat prbs, weights, means;
+ std::vector<cv::Mat> covsHdrs;
+ init_params(_params, params, prbs, weights, means, covsHdrs);
+
+ cv::Mat likelihoods;
+ bool isOk = emObj.train(_samples, _sample_idx, params, _labels, &probs, &likelihoods);
+ if(isOk)
+ {
+ likelihoods = cv::sum(likelihoods).val[0];
+ set_mat_hdrs();
+ }
+
+ return isOk;
+}
+
+float
+CvEM::predict( const Mat& _sample, Mat* _probs, bool isNormalize ) const
+{
+ int cls = emObj.predict(_sample, _probs);
+ if(_probs && isNormalize)
+ cv::normalize(*_probs, *_probs, 1, 0, cv::NORM_L1);
+
+ return (float)cls;
+}
+
+int CvEM::getNClusters() const
+{
+ return emObj.getNClusters();
+}
+
+const Mat& CvEM::getMeans() const
+{
+ return emObj.getMeans();
+}
+
+void CvEM::getCovs(vector<Mat>& _covs) const
+{
+ _covs = emObj.getCovs();
+}
+
+const Mat& CvEM::getWeights() const
+{
+ return emObj.getWeights();
+}
+
+const Mat& CvEM::getProbs() const
+{
+ return probs;
+}
+
+
+/* End of file. */
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+using namespace std;
+using namespace cv;
+
+static
+void defaultDistribs( Mat& means, vector<Mat>& covs )
+{
+ float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f};
+ float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f};
+ float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f};
+ means.create(3, 2, CV_32FC1);
+ Mat m0( 1, 2, CV_32FC1, mp0 ), c0( 2, 2, CV_32FC1, cp0 );
+ Mat m1( 1, 2, CV_32FC1, mp1 ), c1( 2, 2, CV_32FC1, cp1 );
+ Mat m2( 1, 2, CV_32FC1, mp2 ), c2( 2, 2, CV_32FC1, cp2 );
+ means.resize(3), covs.resize(3);
+
+ Mat mr0 = means.row(0);
+ m0.copyTo(mr0);
+ c0.copyTo(covs[0]);
+
+ Mat mr1 = means.row(1);
+ m1.copyTo(mr1);
+ c1.copyTo(covs[1]);
+
+ Mat mr2 = means.row(2);
+ m2.copyTo(mr2);
+ c2.copyTo(covs[2]);
+}
+
+// generate points sets by normal distributions
+static
+void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int labelType )
+{
+ vector<int>::const_iterator sit = sizes.begin();
+ int total = 0;
+ for( ; sit != sizes.end(); ++sit )
+ total += *sit;
+ assert( _means.rows == (int)sizes.size() && covs.size() == sizes.size() );
+ assert( !data.empty() && data.rows == total );
+ assert( data.type() == CV_32FC1 );
+
+ labels.create( data.rows, 1, labelType );
+
+ randn( data, Scalar::all(0.0), Scalar::all(1.0) );
+ vector<Mat> means(sizes.size());
+ for(int i = 0; i < _means.rows; i++)
+ means[i] = _means.row(i);
+ vector<Mat>::const_iterator mit = means.begin(), cit = covs.begin();
+ int bi, ei = 0;
+ sit = sizes.begin();
+ for( int p = 0, l = 0; sit != sizes.end(); ++sit, ++mit, ++cit, l++ )
+ {
+ bi = ei;
+ ei = bi + *sit;
+ assert( mit->rows == 1 && mit->cols == data.cols );
+ assert( cit->rows == data.cols && cit->cols == data.cols );
+ for( int i = bi; i < ei; i++, p++ )
+ {
+ Mat r(1, data.cols, CV_32FC1, data.ptr<float>(i));
+ r = r * (*cit) + *mit;
+ if( labelType == CV_32FC1 )
+ labels.at<float>(p, 0) = (float)l;
+ else if( labelType == CV_32SC1 )
+ labels.at<int>(p, 0) = l;
+ else
+ CV_DbgAssert(0);
+ }
+ }
+}
+
+static
+int maxIdx( const vector<int>& count )
+{
+ int idx = -1;
+ int maxVal = -1;
+ vector<int>::const_iterator it = count.begin();
+ for( int i = 0; it != count.end(); ++it, i++ )
+ {
+ if( *it > maxVal)
+ {
+ maxVal = *it;
+ idx = i;
+ }
+ }
+ assert( idx >= 0);
+ return idx;
+}
+
+static
+bool getLabelsMap( const Mat& labels, const vector<int>& sizes, vector<int>& labelsMap )
+{
+ size_t total = 0, nclusters = sizes.size();
+ for(size_t i = 0; i < sizes.size(); i++)
+ total += sizes[i];
+
+ assert( !labels.empty() );
+ assert( labels.total() == total && (labels.cols == 1 || labels.rows == 1));
+ assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
+
+ bool isFlt = labels.type() == CV_32FC1;
+
+ labelsMap.resize(nclusters);
+
+ vector<bool> buzy(nclusters, false);
+ int startIndex = 0;
+ for( size_t clusterIndex = 0; clusterIndex < sizes.size(); clusterIndex++ )
+ {
+ vector<int> count( nclusters, 0 );
+ for( int i = startIndex; i < startIndex + sizes[clusterIndex]; i++)
+ {
+ int lbl = isFlt ? (int)labels.at<float>(i) : labels.at<int>(i);
+ CV_Assert(lbl < (int)nclusters);
+ count[lbl]++;
+ CV_Assert(count[lbl] < (int)total);
+ }
+ startIndex += sizes[clusterIndex];
+
+ int cls = maxIdx( count );
+ CV_Assert( !buzy[cls] );
+
+ labelsMap[clusterIndex] = cls;
+
+ buzy[cls] = true;
+ }
+ for(size_t i = 0; i < buzy.size(); i++)
+ if(!buzy[i])
+ return false;
+
+ return true;
+}
+
+static
+bool calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, float& err, bool labelsEquivalent = true )
+{
+ err = 0;
+ CV_Assert( !labels.empty() && !origLabels.empty() );
+ CV_Assert( labels.rows == 1 || labels.cols == 1 );
+ CV_Assert( origLabels.rows == 1 || origLabels.cols == 1 );
+ CV_Assert( labels.total() == origLabels.total() );
+ CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
+ CV_Assert( origLabels.type() == labels.type() );
+
+ vector<int> labelsMap;
+ bool isFlt = labels.type() == CV_32FC1;
+ if( !labelsEquivalent )
+ {
+ if( !getLabelsMap( labels, sizes, labelsMap ) )
+ return false;
+
+ for( int i = 0; i < labels.rows; i++ )
+ if( isFlt )
+ err += labels.at<float>(i) != labelsMap[(int)origLabels.at<float>(i)] ? 1.f : 0.f;
+ else
+ err += labels.at<int>(i) != labelsMap[origLabels.at<int>(i)] ? 1.f : 0.f;
+ }
+ else
+ {
+ for( int i = 0; i < labels.rows; i++ )
+ if( isFlt )
+ err += labels.at<float>(i) != origLabels.at<float>(i) ? 1.f : 0.f;
+ else
+ err += labels.at<int>(i) != origLabels.at<int>(i) ? 1.f : 0.f;
+ }
+ err /= (float)labels.rows;
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class CV_CvEMTest : public cvtest::BaseTest
+{
+public:
+ CV_CvEMTest() {}
+protected:
+ virtual void run( int start_from );
+ int runCase( int caseIndex, const CvEMParams& params,
+ const cv::Mat& trainData, const cv::Mat& trainLabels,
+ const cv::Mat& testData, const cv::Mat& testLabels,
+ const vector<int>& sizes);
+};
+
+int CV_CvEMTest::runCase( int caseIndex, const CvEMParams& params,
+ const cv::Mat& trainData, const cv::Mat& trainLabels,
+ const cv::Mat& testData, const cv::Mat& testLabels,
+ const vector<int>& sizes )
+{
+ int code = cvtest::TS::OK;
+
+ cv::Mat labels;
+ float err;
+
+ CvEM em;
+ em.train( trainData, Mat(), params, &labels );
+
+ // check train error
+ if( !calcErr( labels, trainLabels, sizes, err , false ) )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.006f )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on train data.\n", caseIndex, err );
+ code = cvtest::TS::FAIL_BAD_ACCURACY;
+ }
+
+ // check test error
+ labels.create( testData.rows, 1, CV_32SC1 );
+ for( int i = 0; i < testData.rows; i++ )
+ {
+ Mat sample = testData.row(i);
+ labels.at<int>(i,0) = (int)em.predict( sample, 0 );
+ }
+ if( !calcErr( labels, testLabels, sizes, err, false ) )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.006f )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on test data.\n", caseIndex, err );
+ code = cvtest::TS::FAIL_BAD_ACCURACY;
+ }
+
+ return code;
+}
+
+void CV_CvEMTest::run( int /*start_from*/ )
+{
+ int sizesArr[] = { 500, 700, 800 };
+ int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
+
+ // Points distribution
+ Mat means;
+ vector<Mat> covs;
+ defaultDistribs( means, covs );
+
+ // train data
+ Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels;
+ vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
+ generateData( trainData, trainLabels, sizes, means, covs, CV_32SC1 );
+
+ // test data
+ Mat testData( pointsCount, 2, CV_32FC1 ), testLabels;
+ generateData( testData, testLabels, sizes, means, covs, CV_32SC1 );
+
+ CvEMParams params;
+ params.nclusters = 3;
+ Mat probs(trainData.rows, params.nclusters, CV_32FC1, cv::Scalar(1));
+ CvMat probsHdr = probs;
+ params.probs = &probsHdr;
+ Mat weights(1, params.nclusters, CV_32FC1, cv::Scalar(1));
+ CvMat weightsHdr = weights;
+ params.weights = &weightsHdr;
+ CvMat meansHdr = means;
+ params.means = &meansHdr;
+ std::vector<CvMat> covsHdrs(params.nclusters);
+ std::vector<const CvMat*> covsPtrs(params.nclusters);
+ for(int i = 0; i < params.nclusters; i++)
+ {
+ covsHdrs[i] = covs[i];
+ covsPtrs[i] = &covsHdrs[i];
+ }
+ params.covs = &covsPtrs[0];
+
+ int code = cvtest::TS::OK;
+ int caseIndex = 0;
+ {
+ params.start_step = cv::EM::START_AUTO_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_GENERIC;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_AUTO_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_DIAGONAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_AUTO_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_SPHERICAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_M_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_GENERIC;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_M_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_DIAGONAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_M_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_SPHERICAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_E_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_GENERIC;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_E_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_DIAGONAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.start_step = cv::EM::START_E_STEP;
+ params.cov_mat_type = cv::EM::COV_MAT_SPHERICAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+
+ ts->set_failed_test_info( code );
+}
+
+class CV_CvEMTest_SaveLoad : public cvtest::BaseTest {
+public:
+ CV_CvEMTest_SaveLoad() {}
+protected:
+ virtual void run( int /*start_from*/ )
+ {
+ int code = cvtest::TS::OK;
+ cv::EM em;
+
+ Mat samples = Mat(3,1,CV_32F);
+ samples.at<float>(0,0) = 1;
+ samples.at<float>(1,0) = 2;
+ samples.at<float>(2,0) = 3;
+
+ cv::EM::Params params;
+ params.nclusters = 2;
+
+ Mat labels;
+
+ em.train(samples, Mat(), params, &labels);
+
+ Mat firstResult(samples.rows, 1, CV_32FC1);
+ for( int i = 0; i < samples.rows; i++)
+ firstResult.at<float>(i) = em.predict( samples.row(i) );
+
+ // Write out
+
+ string filename = tempfile() + ".xml";
+ {
+ FileStorage fs = FileStorage(filename, FileStorage::WRITE);
+ try
+ {
+ fs << "em" << "{";
+ em.write(fs);
+ fs << "}";
+ }
+ catch(...)
+ {
+ ts->printf( cvtest::TS::LOG, "Crash in write method.\n" );
+ ts->set_failed_test_info( cvtest::TS::FAIL_EXCEPTION );
+ }
+ }
+
+ em.clear();
+
+ // Read in
+ {
+ FileStorage fs = FileStorage(filename, FileStorage::READ);
+ CV_Assert(fs.isOpened());
+ FileNode fn = fs["em"];
+ try
+ {
+ em.read(fn);
+ }
+ catch(...)
+ {
+ ts->printf( cvtest::TS::LOG, "Crash in read method.\n" );
+ ts->set_failed_test_info( cvtest::TS::FAIL_EXCEPTION );
+ }
+ }
+
+ remove( filename.c_str() );
+
+ int errCaseCount = 0;
+ for( int i = 0; i < samples.rows; i++)
+ errCaseCount = std::abs(em.predict(samples.row(i)) - firstResult.at<float>(i)) < FLT_EPSILON ? 0 : 1;
+
+ if( errCaseCount > 0 )
+ {
+ ts->printf( cvtest::TS::LOG, "Different prediction results before writeing and after reading (errCaseCount=%d).\n", errCaseCount );
+ code = cvtest::TS::FAIL_BAD_ACCURACY;
+ }
+
+ ts->set_failed_test_info( code );
+ }
+};
+
+TEST(ML_CvEM, accuracy) { CV_CvEMTest test; test.safe_run(); }
+TEST(ML_CvEM, save_load) { CV_CvEMTest_SaveLoad test; test.safe_run(); }
#ifdef __cplusplus
+#include <map>
+#include <string>
+#include <iostream>
+
// Apple defines a check() macro somewhere in the debug headers
// that interferes with a method definiton in this header
#undef check
/****************************************************************************************\
* Expectation - Maximization *
\****************************************************************************************/
-
-struct CV_EXPORTS_W_MAP CvEMParams
+namespace cv
{
- CvEMParams();
- CvEMParams( int nclusters, int cov_mat_type=1/*CvEM::COV_MAT_DIAGONAL*/,
- int start_step=0/*CvEM::START_AUTO_STEP*/,
- CvTermCriteria term_crit=cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 100, FLT_EPSILON),
- const CvMat* probs=0, const CvMat* weights=0, const CvMat* means=0, const CvMat** covs=0 );
-
- CV_PROP_RW int nclusters;
- CV_PROP_RW int cov_mat_type;
- CV_PROP_RW int start_step;
- const CvMat* probs;
- const CvMat* weights;
- const CvMat* means;
- const CvMat** covs;
- CV_PROP_RW CvTermCriteria term_crit;
-};
-
-
-class CV_EXPORTS_W CvEM : public CvStatModel
+class CV_EXPORTS_W EM : public Algorithm
{
public:
// Type of covariation matrices
- enum { COV_MAT_SPHERICAL=0, COV_MAT_DIAGONAL=1, COV_MAT_GENERIC=2 };
+ enum {COV_MAT_SPHERICAL=0, COV_MAT_DIAGONAL=1, COV_MAT_GENERIC=2};
// The initial step
- enum { START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0 };
+ enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
- CV_WRAP CvEM();
- CvEM( const CvMat* samples, const CvMat* sampleIdx=0,
- CvEMParams params=CvEMParams(), CvMat* labels=0 );
- //CvEM (CvEMParams params, CvMat * means, CvMat ** covs, CvMat * weights,
- // CvMat * probs, CvMat * log_weight_div_det, CvMat * inv_eigen_values, CvMat** cov_rotate_mats);
+ class CV_EXPORTS_W Params
+ {
+ public:
+ Params(int nclusters=10, int covMatType=EM::COV_MAT_DIAGONAL, int startStep=EM::START_AUTO_STEP,
+ const cv::TermCriteria& termCrit=cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 100, FLT_EPSILON),
+ const cv::Mat* probs=0, const cv::Mat* weights=0,
+ const cv::Mat* means=0, const std::vector<cv::Mat>* covs=0);
+
+ int nclusters;
+ int covMatType;
+ int startStep;
+
+ // all 4 following matrices should have type CV_32FC1
+ const cv::Mat* probs;
+ const cv::Mat* weights;
+ const cv::Mat* means;
+ const std::vector<cv::Mat>* covs;
+
+ cv::TermCriteria termCrit;
+ };
- virtual ~CvEM();
+ EM();
+ EM(const cv::Mat& samples, const cv::Mat samplesMask=cv::Mat(),
+ const EM::Params& params=EM::Params(), cv::Mat* labels=0, cv::Mat* probs=0, cv::Mat* likelihoods=0);
+ virtual ~EM();
+ virtual void clear();
- virtual bool train( const CvMat* samples, const CvMat* sampleIdx=0,
- CvEMParams params=CvEMParams(), CvMat* labels=0 );
+ virtual bool train(const cv::Mat& samples, const cv::Mat& samplesMask=cv::Mat(),
+ const EM::Params& params=EM::Params(), cv::Mat* labels=0, cv::Mat* probs=0, cv::Mat* likelihoods=0);
+ int predict(const cv::Mat& sample, cv::Mat* probs=0, double* likelihood=0) const;
- virtual float predict( const CvMat* sample, CV_OUT CvMat* probs ) const;
+ bool isTrained() const;
+ int getNClusters() const;
+ int getCovMatType() const;
-#ifndef SWIG
- CV_WRAP CvEM( const cv::Mat& samples, const cv::Mat& sampleIdx=cv::Mat(),
- CvEMParams params=CvEMParams() );
-
- CV_WRAP virtual bool train( const cv::Mat& samples,
- const cv::Mat& sampleIdx=cv::Mat(),
- CvEMParams params=CvEMParams(),
- CV_OUT cv::Mat* labels=0 );
-
- CV_WRAP virtual float predict( const cv::Mat& sample, CV_OUT cv::Mat* probs=0 ) const;
- CV_WRAP virtual double calcLikelihood( const cv::Mat &sample ) const;
-
- CV_WRAP int getNClusters() const;
- CV_WRAP cv::Mat getMeans() const;
- CV_WRAP void getCovs(CV_OUT std::vector<cv::Mat>& covs) const;
- CV_WRAP cv::Mat getWeights() const;
- CV_WRAP cv::Mat getProbs() const;
-
- CV_WRAP inline double getLikelihood() const { return log_likelihood; }
- CV_WRAP inline double getLikelihoodDelta() const { return log_likelihood_delta; }
-#endif
-
- CV_WRAP virtual void clear();
+ const cv::Mat& getWeights() const;
+ const cv::Mat& getMeans() const;
+ const std::vector<cv::Mat>& getCovs() const;
- int get_nclusters() const;
- const CvMat* get_means() const;
- const CvMat** get_covs() const;
- const CvMat* get_weights() const;
- const CvMat* get_probs() const;
+ AlgorithmInfo* info() const;
+ virtual void read(const FileNode& fn);
- inline double get_log_likelihood() const { return log_likelihood; }
- inline double get_log_likelihood_delta() const { return log_likelihood_delta; }
-
-// inline const CvMat * get_log_weight_div_det () const { return log_weight_div_det; };
-// inline const CvMat * get_inv_eigen_values () const { return inv_eigen_values; };
-// inline const CvMat ** get_cov_rotate_mats () const { return cov_rotate_mats; };
+protected:
+ virtual void setTrainData(const cv::Mat& samples, const cv::Mat& samplesMask, const EM::Params& params);
- virtual void read( CvFileStorage* fs, CvFileNode* node );
- virtual void write( CvFileStorage* fs, const char* name ) const;
+ bool doTrain(const cv::TermCriteria& termCrit);
+ virtual void eStep();
+ virtual void mStep();
- virtual void write_params( CvFileStorage* fs ) const;
- virtual void read_params( CvFileStorage* fs, CvFileNode* node );
+ void clusterTrainSamples();
+ void decomposeCovs();
+ void computeLogWeightDivDet();
-protected:
+ void computeProbabilities(const cv::Mat& sample, int& label, cv::Mat* probs, float* likelihood) const;
- virtual void set_params( const CvEMParams& params,
- const CvVectors& train_data );
- virtual void init_em( const CvVectors& train_data );
- virtual double run_em( const CvVectors& train_data );
- virtual void init_auto( const CvVectors& samples );
- virtual void kmeans( const CvVectors& train_data, int nclusters,
- CvMat* labels, CvTermCriteria criteria,
- const CvMat* means );
- CvEMParams params;
- double log_likelihood;
- double log_likelihood_delta;
-
- CvMat* means;
- CvMat** covs;
- CvMat* weights;
- CvMat* probs;
+ // all inner matrices have type CV_32FC1
+ int nclusters;
+ int covMatType;
+ int startStep;
- CvMat* log_weight_div_det;
- CvMat* inv_eigen_values;
- CvMat** cov_rotate_mats;
+ cv::Mat trainSamples;
+ cv::Mat trainProbs;
+ cv::Mat trainLikelihoods;
+ cv::Mat trainLabels;
+ cv::Mat trainCounts;
+
+ cv::Mat weights;
+ cv::Mat means;
+ std::vector<cv::Mat> covs;
+
+ std::vector<cv::Mat> covsEigenValues;
+ std::vector<cv::Mat> covsRotateMats;
+ std::vector<cv::Mat> invCovsEigenValues;
+ cv::Mat logWeightDivDet;
};
+} // namespace cv
/****************************************************************************************\
* Decision Tree *
CvMat** responses,
int num_classes, ... );
-
-#endif
-
/****************************************************************************************\
* Data *
\****************************************************************************************/
-#include <map>
-#include <string>
-#include <iostream>
-
#define CV_COUNT 0
#define CV_PORTION 1
typedef CvSVMKernel SVMKernel;
typedef CvSVMSolver SVMSolver;
typedef CvSVM SVM;
-typedef CvEMParams EMParams;
-typedef CvEM ExpectationMaximization;
typedef CvDTreeParams DTreeParams;
typedef CvMLData TrainData;
typedef CvDTree DecisionTree;
}
-#endif
+#endif // __cplusplus
+#endif // __OPENCV_ML_HPP__
+
/* End of file. */
#include "precomp.hpp"
-
-/*
- CvEM:
- * params.nclusters - number of clusters to cluster samples to.
- * means - calculated by the EM algorithm set of gaussians' means.
- * log_weight_div_det - auxilary vector that k-th component is equal to
- (-2)*ln(weights_k/det(Sigma_k)^0.5),
- where <weights_k> is the weight,
- <Sigma_k> is the covariation matrice of k-th cluster.
- * inv_eigen_values - set of 1*dims matrices, <inv_eigen_values>[k] contains
- inversed eigen values of covariation matrice of the k-th cluster.
- In the case of <cov_mat_type> == COV_MAT_DIAGONAL,
- inv_eigen_values[k] = Sigma_k^(-1).
- * covs_rotate_mats - used only if cov_mat_type == COV_MAT_GENERIC, in all the
- other cases it is NULL. <covs_rotate_mats>[k] is the orthogonal
- matrice, obtained by the SVD-decomposition of Sigma_k.
- Both <inv_eigen_values> and <covs_rotate_mats> fields are used for representation of
- covariation matrices and simplifying EM calculations.
- For fixed k denote
- u = covs_rotate_mats[k],
- v = inv_eigen_values[k],
- w = v^(-1);
- if <cov_mat_type> == COV_MAT_GENERIC, then Sigma_k = u w u',
- else Sigma_k = w.
- Symbol ' means transposition.
- */
-
-CvEMParams::CvEMParams() : nclusters(10), cov_mat_type(1/*CvEM::COV_MAT_DIAGONAL*/),
- start_step(0/*CvEM::START_AUTO_STEP*/), probs(0), weights(0), means(0), covs(0)
+namespace cv
{
- term_crit=cvTermCriteria( CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 100, FLT_EPSILON );
-}
-CvEMParams::CvEMParams( int _nclusters, int _cov_mat_type, int _start_step,
- CvTermCriteria _term_crit, const CvMat* _probs,
- const CvMat* _weights, const CvMat* _means, const CvMat** _covs ) :
- nclusters(_nclusters), cov_mat_type(_cov_mat_type), start_step(_start_step),
- probs(_probs), weights(_weights), means(_means), covs(_covs), term_crit(_term_crit)
+const float minEigenValue = 1.e-3;
+
+EM::Params::Params( int nclusters, int covMatType, int startStep, const cv::TermCriteria& termCrit,
+ const cv::Mat* probs, const cv::Mat* weights,
+ const cv::Mat* means, const std::vector<cv::Mat>* covs )
+ : nclusters(nclusters), covMatType(covMatType), startStep(startStep),
+ probs(probs), weights(weights), means(means), covs(covs), termCrit(termCrit)
{}
-CvEM::CvEM()
-{
- means = weights = probs = inv_eigen_values = log_weight_div_det = 0;
- covs = cov_rotate_mats = 0;
-}
+///////////////////////////////////////////////////////////////////////////////////////////////////////
-CvEM::CvEM( const CvMat* samples, const CvMat* sample_idx,
- CvEMParams params, CvMat* labels )
-{
- means = weights = probs = inv_eigen_values = log_weight_div_det = 0;
- covs = cov_rotate_mats = 0;
+EM::EM()
+{}
- // just invoke the train() method
- train(samples, sample_idx, params, labels);
+EM::EM(const cv::Mat& samples, const cv::Mat samplesMask,
+ const EM::Params& params, cv::Mat* labels, cv::Mat* probs, cv::Mat* likelihoods)
+{
+ train(samples, samplesMask, params, labels, probs, likelihoods);
}
-CvEM::~CvEM()
+EM::~EM()
{
clear();
}
-
-void CvEM::clear()
+void EM::clear()
{
- int i;
+ trainSamples.release();
+ trainProbs.release();
+ trainLikelihoods.release();
+ trainLabels.release();
+ trainCounts.release();
- cvReleaseMat( &means );
- cvReleaseMat( &weights );
- cvReleaseMat( &probs );
- cvReleaseMat( &inv_eigen_values );
- cvReleaseMat( &log_weight_div_det );
+ weights.release();
+ means.release();
+ covs.clear();
- if( covs || cov_rotate_mats )
- {
- for( i = 0; i < params.nclusters; i++ )
- {
- if( covs )
- cvReleaseMat( &covs[i] );
- if( cov_rotate_mats )
- cvReleaseMat( &cov_rotate_mats[i] );
- }
- cvFree( &covs );
- cvFree( &cov_rotate_mats );
- }
+ covsEigenValues.clear();
+ invCovsEigenValues.clear();
+ covsRotateMats.clear();
+
+ logWeightDivDet.release();
}
-void CvEM::read( CvFileStorage* fs, CvFileNode* node )
+bool EM::train(const cv::Mat& samples, const cv::Mat& samplesMask,
+ const EM::Params& params, cv::Mat* labels, cv::Mat* probs, cv::Mat* likelihoods)
{
- bool ok = false;
- CV_FUNCNAME( "CvEM::read" );
+ setTrainData(samples, samplesMask, params);
- __BEGIN__;
-
- clear();
-
- size_t data_size;
- CvSeqReader reader;
- CvFileNode* em_node = 0;
- CvFileNode* tmp_node = 0;
- CvSeq* seq = 0;
-
- read_params( fs, node );
-
- em_node = cvGetFileNodeByName( fs, node, "cvEM" );
- if( !em_node )
- CV_ERROR( CV_StsBadArg, "cvEM tag not found" );
-
- CV_CALL( weights = (CvMat*)cvReadByName( fs, em_node, "weights" ));
- CV_CALL( means = (CvMat*)cvReadByName( fs, em_node, "means" ));
- CV_CALL( log_weight_div_det = (CvMat*)cvReadByName( fs, em_node, "log_weight_div_det" ));
- CV_CALL( inv_eigen_values = (CvMat*)cvReadByName( fs, em_node, "inv_eigen_values" ));
-
- // Size of all the following data
- data_size = params.nclusters*sizeof(CvMat*);
-
- CV_CALL( covs = (CvMat**)cvAlloc( data_size ));
- memset( covs, 0, data_size );
- CV_CALL( tmp_node = cvGetFileNodeByName( fs, em_node, "covs" ));
- seq = tmp_node->data.seq;
- if( !CV_NODE_IS_SEQ(tmp_node->tag) || seq->total != params.nclusters)
- CV_ERROR( CV_StsParseError, "Missing or invalid sequence of covariance matrices" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( int i = 0; i < params.nclusters; i++ )
- {
- CV_CALL( covs[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
+ bool isOk = doTrain(params.termCrit);
- CV_CALL( cov_rotate_mats = (CvMat**)cvAlloc( data_size ));
- memset( cov_rotate_mats, 0, data_size );
- CV_CALL( tmp_node = cvGetFileNodeByName( fs, em_node, "cov_rotate_mats" ));
- seq = tmp_node->data.seq;
- if( !CV_NODE_IS_SEQ(tmp_node->tag) || seq->total != params.nclusters)
- CV_ERROR( CV_StsParseError, "Missing or invalid sequence of covariance matrices" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( int i = 0; i < params.nclusters; i++ )
+ if(isOk)
{
- CV_CALL( cov_rotate_mats[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
+ if(labels)
+ cv::swap(*labels, trainLabels);
+ if(probs)
+ cv::swap(*probs, trainProbs);
+ if(likelihoods)
+ cv::swap(*likelihoods, trainLikelihoods);
+
+ trainSamples.release();
+ trainProbs.release();
+ trainLabels.release();
+ trainLikelihoods.release();
+ trainCounts.release();
}
-
- ok = true;
- __END__;
-
- if (!ok)
+ else
clear();
+
+ return isOk;
}
-void CvEM::read_params( CvFileStorage *fs, CvFileNode *node)
+int EM::predict(const cv::Mat& sample, cv::Mat* _probs, double* _likelihood) const
{
- CV_FUNCNAME( "CvEM::read_params");
-
- __BEGIN__;
-
- size_t data_size;
- CvEMParams _params;
- CvSeqReader reader;
- CvFileNode* param_node = 0;
- CvFileNode* tmp_node = 0;
- CvSeq* seq = 0;
-
- const char * start_step_name = 0;
- const char * cov_mat_type_name = 0;
-
- param_node = cvGetFileNodeByName( fs, node, "params" );
- if( !param_node )
- CV_ERROR( CV_StsBadArg, "params tag not found" );
+ CV_Assert(isTrained());
- CV_CALL( start_step_name = cvReadStringByName( fs, param_node, "start_step", 0 ) );
- CV_CALL( cov_mat_type_name = cvReadStringByName( fs, param_node, "cov_mat_type", 0 ) );
+ CV_Assert(!sample.empty());
+ CV_Assert(sample.type() == CV_32FC1);
- if( start_step_name )
- _params.start_step = strcmp( start_step_name, "START_E_STEP" ) == 0 ? START_E_STEP :
- strcmp( start_step_name, "START_M_STEP" ) == 0 ? START_M_STEP :
- strcmp( start_step_name, "START_AUTO_STEP" ) == 0 ? START_AUTO_STEP : 0;
- else
- CV_CALL( _params.start_step = cvReadIntByName( fs, param_node, "start_step", -1 ) );
+ int label;
+ float likelihood;
+ computeProbabilities(sample, label, _probs, _likelihood ? &likelihood : 0);
+ if(_likelihood)
+ *_likelihood = static_cast<double>(likelihood);
+ return label;
+}
- if( cov_mat_type_name )
- _params.cov_mat_type = strcmp( cov_mat_type_name, "COV_MAT_SPHERICAL" ) == 0 ? COV_MAT_SPHERICAL :
- strcmp( cov_mat_type_name, "COV_MAT_DIAGONAL" ) == 0 ? COV_MAT_DIAGONAL :
- strcmp( cov_mat_type_name, "COV_MAT_GENERIC" ) == 0 ? COV_MAT_GENERIC : 0;
- else
- CV_CALL( _params.cov_mat_type = cvReadIntByName( fs, param_node, "cov_mat_type", -1) );
-
- CV_CALL( _params.nclusters = cvReadIntByName( fs, param_node, "nclusters", -1 ));
- CV_CALL( _params.weights = (CvMat*)cvReadByName( fs, param_node, "weights" ));
- CV_CALL( _params.means = (CvMat*)cvReadByName( fs, param_node, "means" ));
-
- data_size = _params.nclusters*sizeof(CvMat*);
- CV_CALL( _params.covs = (const CvMat**)cvAlloc( data_size ));
- memset( _params.covs, 0, data_size );
- CV_CALL( tmp_node = cvGetFileNodeByName( fs, param_node, "covs" ));
- seq = tmp_node->data.seq;
- if( !CV_NODE_IS_SEQ(tmp_node->tag) || seq->total != _params.nclusters)
- CV_ERROR( CV_StsParseError, "Missing or invalid sequence of covariance matrices" );
- CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
- for( int i = 0; i < _params.nclusters; i++ )
- {
- CV_CALL( _params.covs[i] = (CvMat*)cvRead( fs, (CvFileNode*)reader.ptr ));
- CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
- }
- params = _params;
+bool EM::isTrained() const
+{
+ return !means.empty();
+}
- __END__;
+int EM::getNClusters() const
+{
+ return isTrained() ? nclusters : -1;
}
-void CvEM::write_params( CvFileStorage* fs ) const
+int EM::getCovMatType() const
{
- CV_FUNCNAME( "CvEM::write_params" );
+ return isTrained() ? covMatType : -1;
+}
- __BEGIN__;
+const cv::Mat& EM::getWeights() const
+{
+ CV_Assert((isTrained() && !weights.empty()) || (!isTrained() && weights.empty()));
+ return weights;
+}
- const char* cov_mat_type_name =
- (params.cov_mat_type == COV_MAT_SPHERICAL) ? "COV_MAT_SPHERICAL" :
- (params.cov_mat_type == COV_MAT_DIAGONAL) ? "COV_MAT_DIAGONAL" :
- (params.cov_mat_type == COV_MAT_GENERIC) ? "COV_MAT_GENERIC" : 0;
+const cv::Mat& EM::getMeans() const
+{
+ CV_Assert((isTrained() && !means.empty()) || (!isTrained() && means.empty()));
+ return means;
+}
- const char* start_step_name =
- (params.start_step == START_E_STEP) ? "START_E_STEP" :
- (params.start_step == START_M_STEP) ? "START_M_STEP" :
- (params.start_step == START_AUTO_STEP) ? "START_AUTO_STEP" : 0;
+const std::vector<cv::Mat>& EM::getCovs() const
+{
+ CV_Assert((isTrained() && !covs.empty()) || (!isTrained() && covs.empty()));
+ return covs;
+}
- CV_CALL( cvStartWriteStruct( fs, "params", CV_NODE_MAP ) );
+static
+void checkTrainData(const cv::Mat& samples, const cv::Mat& samplesMask, const EM::Params& params)
+{
+ // Check samples.
+ CV_Assert(!samples.empty());
+ CV_Assert(samples.type() == CV_32FC1);
+
+ int nsamples = samples.rows;
+ int dim = samples.cols;
+
+ // Check samples indices.
+ CV_Assert(samplesMask.empty() ||
+ ((samplesMask.rows == 1 || samplesMask.cols == 1) &&
+ static_cast<int>(samplesMask.total()) == nsamples && samplesMask.type() == CV_8UC1));
+
+ // Check training params.
+ CV_Assert(params.nclusters > 0);
+ CV_Assert(params.nclusters <= nsamples);
+ CV_Assert(params.startStep == EM::START_AUTO_STEP || params.startStep == EM::START_E_STEP || params.startStep == EM::START_M_STEP);
+
+ CV_Assert(!params.probs ||
+ (!params.probs->empty() &&
+ params.probs->rows == nsamples && params.probs->cols == params.nclusters &&
+ params.probs->type() == CV_32FC1));
+
+ CV_Assert(!params.weights ||
+ (!params.weights->empty() &&
+ (params.weights->cols == 1 || params.weights->rows == 1) && static_cast<int>(params.weights->total()) == params.nclusters &&
+ params.weights->type() == CV_32FC1));
+
+ CV_Assert(!params.means ||
+ (!params.means->empty() &&
+ params.means->rows == params.nclusters && params.means->cols == dim &&
+ params.means->type() == CV_32FC1));
+
+ CV_Assert(!params.covs ||
+ (!params.covs->empty() &&
+ static_cast<int>(params.covs->size()) == params.nclusters));
+ if(params.covs)
+ {
+ const cv::Size covSize(dim, dim);
+ for(size_t i = 0; i < params.covs->size(); i++)
+ {
+ const cv::Mat& m = (*params.covs)[i];
+ CV_Assert(!m.empty() && m.size() == covSize && (m.type() == CV_32FC1));
+ }
+ }
- if( cov_mat_type_name )
+ if(params.startStep == EM::START_E_STEP)
{
- CV_CALL( cvWriteString( fs, "cov_mat_type", cov_mat_type_name) );
+ CV_Assert(params.means);
}
- else
+ else if(params.startStep == EM::START_M_STEP)
{
- CV_CALL( cvWriteInt( fs, "cov_mat_type", params.cov_mat_type ) );
+ CV_Assert(params.probs);
}
+}
- if( start_step_name )
+static
+void preprocessSampleData(const cv::Mat& src, cv::Mat& dst, int dstType, const cv::Mat& samplesMask, bool isAlwaysClone)
+{
+ if(samplesMask.empty() || cv::countNonZero(samplesMask) == src.rows)
{
- CV_CALL( cvWriteString( fs, "start_step", start_step_name) );
+ if(src.type() == dstType && !isAlwaysClone)
+ dst = src;
+ else
+ src.convertTo(dst, dstType);
}
else
{
- CV_CALL( cvWriteInt( fs, "cov_mat_type", params.start_step ) );
+ dst.release();
+ for(int sampleIndex = 0; sampleIndex < src.rows; sampleIndex++)
+ {
+ if(samplesMask.at<uchar>(sampleIndex))
+ {
+ cv::Mat sample = src.row(sampleIndex);
+ cv::Mat sample_dbl;
+ sample.convertTo(sample_dbl, dstType);
+ dst.push_back(sample_dbl);
+ }
+ }
}
-
- CV_CALL( cvWriteInt( fs, "nclusters", params.nclusters ));
- CV_CALL( cvWrite( fs, "weights", weights ));
- CV_CALL( cvWrite( fs, "means", means ));
-
- CV_CALL( cvStartWriteStruct( fs, "covs", CV_NODE_SEQ ));
- for( int i = 0; i < params.nclusters; i++ )
- CV_CALL( cvWrite( fs, NULL, covs[i] ));
- CV_CALL( cvEndWriteStruct( fs ) );
-
- // Close params struct
- CV_CALL( cvEndWriteStruct( fs ) );
-
- __END__;
}
-void CvEM::write( CvFileStorage* fs, const char* name ) const
+static
+void preprocessProbability(cv::Mat& probs)
{
- CV_FUNCNAME( "CvEM::write" );
-
- __BEGIN__;
-
- CV_CALL( cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_EM ) );
+ cv::max(probs, 0., probs);
- write_params(fs);
-
- CV_CALL( cvStartWriteStruct( fs, "cvEM", CV_NODE_MAP ) );
-
- CV_CALL( cvWrite( fs, "means", means ) );
- CV_CALL( cvWrite( fs, "weights", weights ) );
- CV_CALL( cvWrite( fs, "log_weight_div_det", log_weight_div_det ) );
- CV_CALL( cvWrite( fs, "inv_eigen_values", inv_eigen_values ) );
-
- CV_CALL( cvStartWriteStruct( fs, "covs", CV_NODE_SEQ ));
- for( int i = 0; i < params.nclusters; i++ )
- CV_CALL( cvWrite( fs, NULL, covs[i] ));
- CV_CALL( cvEndWriteStruct( fs ));
-
- CV_CALL( cvStartWriteStruct( fs, "cov_rotate_mats", CV_NODE_SEQ ));
- for( int i = 0; i < params.nclusters; i++ )
- CV_CALL( cvWrite( fs, NULL, cov_rotate_mats[i] ));
- CV_CALL( cvEndWriteStruct( fs ) );
-
- // close cvEM
- CV_CALL( cvEndWriteStruct( fs ) );
-
- // close top level
- CV_CALL( cvEndWriteStruct( fs ) );
+ const float uniformProbability = 1./probs.cols;
+ for(int y = 0; y < probs.rows; y++)
+ {
+ cv::Mat sampleProbs = probs.row(y);
- __END__;
+ double maxVal = 0;
+ cv::minMaxLoc(sampleProbs, 0, &maxVal);
+ if(maxVal < FLT_EPSILON)
+ sampleProbs.setTo(uniformProbability);
+ else
+ cv::normalize(sampleProbs, sampleProbs, 1, 0, cv::NORM_L1);
+ }
}
-void CvEM::set_params( const CvEMParams& _params, const CvVectors& train_data )
+void EM::setTrainData(const cv::Mat& samples, const cv::Mat& samplesMask, const EM::Params& params)
{
- CV_FUNCNAME( "CvEM::set_params" );
+ clear();
- __BEGIN__;
+ checkTrainData(samples, samplesMask, params);
- int k;
+ // Set checked data
- params = _params;
- params.term_crit = cvCheckTermCriteria( params.term_crit, 1e-6, 10000 );
+ nclusters = params.nclusters;
+ covMatType = params.covMatType;
+ startStep = params.startStep;
- if( params.cov_mat_type != COV_MAT_SPHERICAL &&
- params.cov_mat_type != COV_MAT_DIAGONAL &&
- params.cov_mat_type != COV_MAT_GENERIC )
- CV_ERROR( CV_StsBadArg, "Unknown covariation matrix type" );
+ preprocessSampleData(samples, trainSamples, CV_32FC1, samplesMask, false);
- switch( params.start_step )
+ // set probs
+ if(params.probs && startStep == EM::START_M_STEP)
{
- case START_M_STEP:
- if( !params.probs )
- CV_ERROR( CV_StsNullPtr, "Probabilities must be specified when EM algorithm starts with M-step" );
- break;
- case START_E_STEP:
- if( !params.means )
- CV_ERROR( CV_StsNullPtr, "Mean's must be specified when EM algorithm starts with E-step" );
- break;
- case START_AUTO_STEP:
- break;
- default:
- CV_ERROR( CV_StsBadArg, "Unknown start_step" );
+ preprocessSampleData(*params.probs, trainProbs, CV_32FC1, samplesMask, true);
+ preprocessProbability(trainProbs);
}
- if( params.nclusters < 1 )
- CV_ERROR( CV_StsOutOfRange, "The number of clusters (mixtures) should be > 0" );
-
- if( params.probs )
+ // set weights
+ if(params.weights && (startStep == EM::START_E_STEP && params.covs))
{
- const CvMat* p = params.probs;
- if( !CV_IS_MAT(p) ||
- (CV_MAT_TYPE(p->type) != CV_32FC1 &&
- CV_MAT_TYPE(p->type) != CV_64FC1) ||
- p->rows != train_data.count ||
- p->cols != params.nclusters )
- CV_ERROR( CV_StsBadArg, "The array of probabilities must be a valid "
- "floating-point matrix (CvMat) of 'nsamples' x 'nclusters' size" );
+ params.weights->convertTo(weights, CV_32FC1);
+ weights.reshape(1,1);
+ preprocessProbability(weights);
}
- if( params.means )
- {
- const CvMat* m = params.means;
- if( !CV_IS_MAT(m) ||
- (CV_MAT_TYPE(m->type) != CV_32FC1 &&
- CV_MAT_TYPE(m->type) != CV_64FC1) ||
- m->rows != params.nclusters ||
- m->cols != train_data.dims )
- CV_ERROR( CV_StsBadArg, "The array of mean's must be a valid "
- "floating-point matrix (CvMat) of 'nsamples' x 'dims' size" );
- }
+ // set means
+ if(params.means && (startStep == EM::START_E_STEP || startStep == EM::START_AUTO_STEP))
+ params.means->convertTo(means, CV_32FC1);
- if( params.weights )
+ // set covs
+ if(params.covs && (startStep == EM::START_E_STEP && params.weights))
{
- const CvMat* w = params.weights;
- if( !CV_IS_MAT(w) ||
- (CV_MAT_TYPE(w->type) != CV_32FC1 &&
- CV_MAT_TYPE(w->type) != CV_64FC1) ||
- (w->rows != 1 && w->cols != 1) ||
- w->rows + w->cols - 1 != params.nclusters )
- CV_ERROR( CV_StsBadArg, "The array of weights must be a valid "
- "1d floating-point vector (CvMat) of 'nclusters' elements" );
+ covs.resize(nclusters);
+ for(size_t i = 0; i < params.covs->size(); i++)
+ (*params.covs)[i].convertTo(covs[i], CV_32FC1);
}
-
- if( params.covs )
- for( k = 0; k < params.nclusters; k++ )
- {
- const CvMat* cov = params.covs[k];
- if( !CV_IS_MAT(cov) ||
- (CV_MAT_TYPE(cov->type) != CV_32FC1 &&
- CV_MAT_TYPE(cov->type) != CV_64FC1) ||
- cov->rows != cov->cols || cov->cols != train_data.dims )
- CV_ERROR( CV_StsBadArg,
- "Each of covariation matrices must be a valid square "
- "floating-point matrix (CvMat) of 'dims' x 'dims'" );
- }
-
- __END__;
}
-/****************************************************************************************/
-double CvEM::calcLikelihood( const cv::Mat &input_sample ) const
+void EM::decomposeCovs()
{
- const CvMat _input_sample = input_sample;
- const CvMat* _sample = &_input_sample ;
-
- float* sample_data = 0;
- int cov_mat_type = params.cov_mat_type;
- int i, dims = means->cols;
- int nclusters = params.nclusters;
-
- cvPreparePredictData( _sample, dims, 0, params.nclusters, 0, &sample_data );
+ CV_Assert(!covs.empty());
+ covsEigenValues.resize(nclusters);
+ if(covMatType == EM::COV_MAT_GENERIC)
+ covsRotateMats.resize(nclusters);
+ invCovsEigenValues.resize(nclusters);
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
+ {
+ CV_Assert(!covs[clusterIndex].empty());
- // allocate memory and initializing headers for calculating
- cv::AutoBuffer<double> buffer(nclusters + dims);
- CvMat expo = cvMat(1, nclusters, CV_64F, &buffer[0] );
- CvMat diff = cvMat(1, dims, CV_64F, &buffer[nclusters] );
+ cv::SVD svd(covs[clusterIndex], cv::SVD::MODIFY_A + cv::SVD::FULL_UV);
+ CV_DbgAssert(svd.w.rows == 1 || svd.w.cols == 1);
+ CV_DbgAssert(svd.w.type() == CV_32FC1 && svd.u.type() == CV_32FC1);
- // calculate the probabilities
- for( int k = 0; k < nclusters; k++ )
- {
- const double* mean_k = (const double*)(means->data.ptr + means->step*k);
- const double* w = (const double*)(inv_eigen_values->data.ptr + inv_eigen_values->step*k);
- double cur = log_weight_div_det->data.db[k];
- CvMat* u = cov_rotate_mats[k];
- // cov = u w u' --> cov^(-1) = u w^(-1) u'
- if( cov_mat_type == COV_MAT_SPHERICAL )
+ if(covMatType == EM::COV_MAT_SPHERICAL)
{
- double w0 = w[0];
- for( i = 0; i < dims; i++ )
- {
- double val = sample_data[i] - mean_k[i];
- cur += val*val*w0;
- }
+ float maxSingularVal = svd.w.at<float>(0);
+ covsEigenValues[clusterIndex] = cv::Mat(1, 1, CV_32FC1, cv::Scalar(maxSingularVal));
}
- else
+ else if(covMatType == EM::COV_MAT_DIAGONAL)
{
- for( i = 0; i < dims; i++ )
- diff.data.db[i] = sample_data[i] - mean_k[i];
- if( cov_mat_type == COV_MAT_GENERIC )
- cvGEMM( &diff, u, 1, 0, 0, &diff, CV_GEMM_B_T );
- for( i = 0; i < dims; i++ )
- {
- double val = diff.data.db[i];
- cur += val*val*w[i];
- }
+ covsEigenValues[clusterIndex] = svd.w;
}
- expo.data.db[k] = cur;
+ else //EM::COV_MAT_GENERIC
+ {
+ covsEigenValues[clusterIndex] = svd.w;
+ covsRotateMats[clusterIndex] = svd.u;
+ }
+ cv::max(covsEigenValues[clusterIndex], minEigenValue, covsEigenValues[clusterIndex]);
+ invCovsEigenValues[clusterIndex] = 1./covsEigenValues[clusterIndex];
}
-
- // probability = (2*pi)^(-dims/2)*exp( -0.5 * cur )
- cvConvertScale( &expo, &expo, -0.5 );
- double factor = -double(dims)/2.0 * log(2.0*CV_PI);
- cvAndS( &expo, cvScalar(factor), &expo );
-
- // Calculate the log-likelihood of the given sample -
- // see Alex Smola's blog http://blog.smola.org/page/2 for
- // details on the log-sum-exp trick
- double mini,maxi,retval;
- cvMinMaxLoc( &expo, &mini, &maxi, 0, 0 );
- CvMat *flp = cvCloneMat(&expo);
- cvSubS( &expo, cvScalar(maxi), flp);
- cvExp( flp, flp );
- CvScalar ss = cvSum( flp );
- retval = log(ss.val[0]) + maxi;
- cvReleaseMat(&flp);
-
- if( sample_data != _sample->data.fl )
- cvFree( &sample_data );
-
- return retval;
}
-/****************************************************************************************/
-float
-CvEM::predict( const CvMat* _sample, CvMat* _probs ) const
+void EM::clusterTrainSamples()
{
- float* sample_data = 0;
- int cls = 0;
-
- int cov_mat_type = params.cov_mat_type;
- double opt = FLT_MAX;
-
- int i, dims = means->cols;
- int nclusters = params.nclusters;
-
- cvPreparePredictData( _sample, dims, 0, params.nclusters, _probs, &sample_data );
-
- // allocate memory and initializing headers for calculating
- cv::AutoBuffer<double> buffer(nclusters + dims);
- CvMat expo = cvMat(1, nclusters, CV_64F, &buffer[0] );
- CvMat diff = cvMat(1, dims, CV_64F, &buffer[nclusters] );
-
- // calculate the probabilities
- for( int k = 0; k < nclusters; k++ )
+ int nsamples = trainSamples.rows;
+
+ // Cluster samples, compute/update means
+ cv::Mat labels;
+ cv::kmeans(trainSamples, nclusters, labels,
+ cv::TermCriteria(cv::TermCriteria::COUNT, means.empty() ? 10 : 1, 0.5),
+ 10, cv::KMEANS_PP_CENTERS, means);
+ CV_Assert(means.type() == CV_32FC1);
+
+ // Compute weights and covs
+ weights = cv::Mat(1, nclusters, CV_32FC1, cv::Scalar(0));
+ covs.resize(nclusters);
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
- const double* mean_k = (const double*)(means->data.ptr + means->step*k);
- const double* w = (const double*)(inv_eigen_values->data.ptr + inv_eigen_values->step*k);
- double cur = log_weight_div_det->data.db[k];
- CvMat* u = cov_rotate_mats[k];
- // cov = u w u' --> cov^(-1) = u w^(-1) u'
- if( cov_mat_type == COV_MAT_SPHERICAL )
- {
- double w0 = w[0];
- for( i = 0; i < dims; i++ )
- {
- double val = sample_data[i] - mean_k[i];
- cur += val*val*w0;
- }
- }
- else
+ cv::Mat clusterSamples;
+ for(int sampleIndex = 0; sampleIndex < nsamples; sampleIndex++)
{
- for( i = 0; i < dims; i++ )
- diff.data.db[i] = sample_data[i] - mean_k[i];
- if( cov_mat_type == COV_MAT_GENERIC )
- cvGEMM( &diff, u, 1, 0, 0, &diff, CV_GEMM_B_T );
- for( i = 0; i < dims; i++ )
+ if(labels.at<int>(sampleIndex) == clusterIndex)
{
- double val = diff.data.db[i];
- cur += val*val*w[i];
+ const cv::Mat sample = trainSamples.row(sampleIndex);
+ clusterSamples.push_back(sample);
}
}
+ CV_Assert(!clusterSamples.empty());
- expo.data.db[k] = cur;
- if( cur < opt )
- {
- cls = k;
- opt = cur;
- }
- }
-
- // probability = (2*pi)^(-dims/2)*exp( -0.5 * cur )
- cvConvertScale( &expo, &expo, -0.5 );
- double factor = -double(dims)/2.0 * log(2.0*CV_PI);
- cvAndS( &expo, cvScalar(factor), &expo );
-
- // Calculate the posterior probability of each component
- // given the sample data.
- if( _probs )
- {
- cvExp( &expo, &expo );
- if( _probs->cols == 1 )
- cvReshape( &expo, &expo, 0, nclusters );
- cvConvertScale( &expo, _probs, 1./cvSum( &expo ).val[0] );
+ cv::calcCovarMatrix(clusterSamples, covs[clusterIndex], means.row(clusterIndex),
+ CV_COVAR_NORMAL + CV_COVAR_ROWS + CV_COVAR_USE_AVG + CV_COVAR_SCALE, CV_32FC1);
+ weights.at<float>(clusterIndex) = static_cast<float>(clusterSamples.rows)/static_cast<float>(nsamples);
}
- if( sample_data != _sample->data.fl )
- cvFree( &sample_data );
-
- return (float)cls;
+ decomposeCovs();
}
-
-
-bool CvEM::train( const CvMat* _samples, const CvMat* _sample_idx,
- CvEMParams _params, CvMat* labels )
+void EM::computeLogWeightDivDet()
{
- bool result = false;
- CvVectors train_data;
- CvMat* sample_idx = 0;
-
- train_data.data.fl = 0;
- train_data.count = 0;
+ CV_Assert(!covsEigenValues.empty());
- CV_FUNCNAME("cvEM");
-
- __BEGIN__;
-
- int i, nsamples, nclusters, dims;
-
- clear();
+ cv::Mat logWeights;
+ cv::log(weights, logWeights);
- CV_CALL( cvPrepareTrainData( "cvEM",
- _samples, CV_ROW_SAMPLE, 0, CV_VAR_CATEGORICAL,
- 0, _sample_idx, false, (const float***)&train_data.data.fl,
- &train_data.count, &train_data.dims, &train_data.dims,
- 0, 0, 0, &sample_idx ));
+ logWeightDivDet.create(1, nclusters, CV_32FC1);
+ // note: logWeightDivDet = log(weight_k) - 0.5 * log(|det(cov_k)|)
- CV_CALL( set_params( _params, train_data ));
- nsamples = train_data.count;
- nclusters = params.nclusters;
- dims = train_data.dims;
-
- if( labels && (!CV_IS_MAT(labels) || CV_MAT_TYPE(labels->type) != CV_32SC1 ||
- (labels->cols != 1 && labels->rows != 1) || labels->cols + labels->rows - 1 != nsamples ))
- CV_ERROR( CV_StsBadArg,
- "labels array (when passed) must be a valid 1d integer vector of <sample_count> elements" );
-
- if( nsamples <= nclusters )
- CV_ERROR( CV_StsOutOfRange,
- "The number of samples should be greater than the number of clusters" );
-
- CV_CALL( log_weight_div_det = cvCreateMat( 1, nclusters, CV_64FC1 ));
- CV_CALL( probs = cvCreateMat( nsamples, nclusters, CV_64FC1 ));
- CV_CALL( means = cvCreateMat( nclusters, dims, CV_64FC1 ));
- CV_CALL( weights = cvCreateMat( 1, nclusters, CV_64FC1 ));
- CV_CALL( inv_eigen_values = cvCreateMat( nclusters,
- params.cov_mat_type == COV_MAT_SPHERICAL ? 1 : dims, CV_64FC1 ));
- CV_CALL( covs = (CvMat**)cvAlloc( nclusters * sizeof(*covs) ));
- CV_CALL( cov_rotate_mats = (CvMat**)cvAlloc( nclusters * sizeof(cov_rotate_mats[0]) ));
-
- for( i = 0; i < nclusters; i++ )
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
- CV_CALL( covs[i] = cvCreateMat( dims, dims, CV_64FC1 ));
- CV_CALL( cov_rotate_mats[i] = cvCreateMat( dims, dims, CV_64FC1 ));
- cvZero( cov_rotate_mats[i] );
- }
-
- init_em( train_data );
- log_likelihood = run_em( train_data );
+ float logDetCov = 0.;
+ for(int di = 0; di < covsEigenValues[clusterIndex].cols; di++)
+ logDetCov += std::log(covsEigenValues[clusterIndex].at<float>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0));
- if( log_likelihood <= -DBL_MAX/10000. )
- EXIT;
+ logWeightDivDet.at<float>(clusterIndex) = logWeights.at<float>(clusterIndex) - 0.5 * logDetCov;
+ }
+}
- if( labels )
+bool EM::doTrain(const cv::TermCriteria& termCrit)
+{
+ int dim = trainSamples.cols;
+ // Precompute the empty initial train data in the cases of EM::START_E_STEP and START_AUTO_STEP
+ if(startStep != EM::START_M_STEP)
{
- if( nclusters == 1 )
- cvZero( labels );
- else
+ if(weights.empty())
{
- CvMat sample = cvMat( 1, dims, CV_32F );
- CvMat prob = cvMat( 1, nclusters, CV_64F );
- int lstep = CV_IS_MAT_CONT(labels->type) ? 1 : labels->step/sizeof(int);
-
- for( i = 0; i < nsamples; i++ )
- {
- int idx = sample_idx ? sample_idx->data.i[i] : i;
- sample.data.ptr = _samples->data.ptr + _samples->step*idx;
- prob.data.ptr = probs->data.ptr + probs->step*i;
-
- labels->data.i[i*lstep] = cvRound(predict(&sample, &prob));
- }
+ CV_Assert(covs.empty());
+ clusterTrainSamples();
}
}
- result = true;
-
- __END__;
-
- if( sample_idx != _sample_idx )
- cvReleaseMat( &sample_idx );
-
- cvFree( &train_data.data.ptr );
-
- return result;
-}
+ if(!covs.empty() && covsEigenValues.empty() )
+ {
+ CV_Assert(invCovsEigenValues.empty());
+ decomposeCovs();
+ }
+ if(startStep == EM::START_M_STEP)
+ mStep();
-void CvEM::init_em( const CvVectors& train_data )
-{
- CvMat *w = 0, *u = 0, *tcov = 0;
+ double trainLikelihood, prevTrainLikelihood;
+ for(int iter = 0; ; iter++)
+ {
+ eStep();
+ trainLikelihood = cv::sum(trainLikelihoods)[0];
- CV_FUNCNAME( "CvEM::init_em" );
+ if(iter >= termCrit.maxCount - 1)
+ break;
- __BEGIN__;
+ double trainLikelihoodDelta = trainLikelihood - (iter > 0 ? prevTrainLikelihood : 0);
+ if( iter != 0 &&
+ (trainLikelihoodDelta < -DBL_EPSILON ||
+ trainLikelihoodDelta < termCrit.epsilon * std::fabs(trainLikelihood)))
+ break;
- double maxval = 0;
- int i, force_symm_plus = 0;
- int nclusters = params.nclusters, nsamples = train_data.count, dims = train_data.dims;
+ mStep();
- if( params.start_step == START_AUTO_STEP || nclusters == 1 || nclusters == nsamples )
- init_auto( train_data );
- else if( params.start_step == START_M_STEP )
- {
- for( i = 0; i < nsamples; i++ )
- {
- CvMat prob;
- cvGetRow( params.probs, &prob, i );
- cvMaxS( &prob, 0., &prob );
- cvMinMaxLoc( &prob, 0, &maxval );
- if( maxval < FLT_EPSILON )
- cvSet( &prob, cvScalar(1./nclusters) );
- else
- cvNormalize( &prob, &prob, 1., 0, CV_L1 );
- }
- EXIT; // do not preprocess covariation matrices,
- // as in this case they are initialized at the first iteration of EM
- }
- else
- {
- CV_ASSERT( params.start_step == START_E_STEP && params.means );
- if( params.weights && params.covs )
- {
- cvConvert( params.means, means );
- cvReshape( weights, weights, 1, params.weights->rows );
- cvConvert( params.weights, weights );
- cvReshape( weights, weights, 1, 1 );
- cvMaxS( weights, 0., weights );
- cvMinMaxLoc( weights, 0, &maxval );
- if( maxval < FLT_EPSILON )
- cvSet( weights, cvScalar(1./nclusters) );
- cvNormalize( weights, weights, 1., 0, CV_L1 );
- for( i = 0; i < nclusters; i++ )
- CV_CALL( cvConvert( params.covs[i], covs[i] ));
- force_symm_plus = 1;
- }
- else
- init_auto( train_data );
+ prevTrainLikelihood = trainLikelihood;
}
- CV_CALL( tcov = cvCreateMat( dims, dims, CV_64FC1 ));
- CV_CALL( w = cvCreateMat( dims, dims, CV_64FC1 ));
- if( params.cov_mat_type != COV_MAT_SPHERICAL )
- CV_CALL( u = cvCreateMat( dims, dims, CV_64FC1 ));
+ if( trainLikelihood <= -DBL_MAX/10000. )
+ return false;
- for( i = 0; i < nclusters; i++ )
+ // postprocess covs
+ covs.resize(nclusters);
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
- if( force_symm_plus )
+ if(covMatType == EM::COV_MAT_SPHERICAL)
{
- cvTranspose( covs[i], tcov );
- cvAddWeighted( covs[i], 0.5, tcov, 0.5, 0, tcov );
- }
- else
- cvCopy( covs[i], tcov );
- cvSVD( tcov, w, u, 0, CV_SVD_MODIFY_A + CV_SVD_U_T + CV_SVD_V_T );
- if( params.cov_mat_type == COV_MAT_SPHERICAL )
- cvSetIdentity( covs[i], cvScalar(cvTrace(w).val[0]/dims) );
- /*else if( params.cov_mat_type == COV_MAT_DIAGONAL )
- cvCopy( w, covs[i] );*/
- else
- {
- // generic case: covs[i] = (u')'*max(w,0)*u'
- cvGEMM( u, w, 1, 0, 0, tcov, CV_GEMM_A_T );
- cvGEMM( tcov, u, 1, 0, 0, covs[i], 0 );
+ covs[clusterIndex].create(dim, dim, CV_32FC1);
+ cv::setIdentity(covs[clusterIndex], cv::Scalar(covsEigenValues[clusterIndex].at<float>(0)));
}
+ else if(covMatType == EM::COV_MAT_DIAGONAL)
+ covs[clusterIndex] = cv::Mat::diag(covsEigenValues[clusterIndex].t());
}
- __END__;
-
- cvReleaseMat( &w );
- cvReleaseMat( &u );
- cvReleaseMat( &tcov );
+ return true;
}
-
-void CvEM::init_auto( const CvVectors& train_data )
+void EM::computeProbabilities(const cv::Mat& sample, int& label, cv::Mat* probs, float* likelihood) const
{
- CvMat* hdr = 0;
- const void** vec = 0;
- CvMat* class_ranges = 0;
- CvMat* labels = 0;
+ // L_ik = log(weight_k) - 0.5 * log(|det(cov_k)|) - 0.5 *(x_i - mean_k)' cov_k^(-1) (x_i - mean_k)]
+ // q = arg(max_k(L_ik))
+ // probs_ik = exp(L_ik - L_iq) / (1 + sum_j!=q (exp(L_jk))
- CV_FUNCNAME( "CvEM::init_auto" );
+ CV_DbgAssert(sample.rows == 1);
- __BEGIN__;
+ int dim = sample.cols;
- int nclusters = params.nclusters, nsamples = train_data.count, dims = train_data.dims;
- int i, j;
+ cv::Mat L(1, nclusters, CV_32FC1);
+ cv::Mat expL(1, nclusters, CV_32FC1);
- if( nclusters == nsamples )
+ label = 0;
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
- CvMat src = cvMat( 1, dims, CV_32F );
- CvMat dst = cvMat( 1, dims, CV_64F );
- for( i = 0; i < nsamples; i++ )
- {
- src.data.ptr = train_data.data.ptr[i];
- dst.data.ptr = means->data.ptr + means->step*i;
- cvConvert( &src, &dst );
- cvZero( covs[i] );
- cvSetIdentity( cov_rotate_mats[i] );
- }
- cvSetIdentity( probs );
- cvSet( weights, cvScalar(1./nclusters) );
- }
- else
- {
- int max_count = 0;
+ const cv::Mat centeredSample = sample - means.row(clusterIndex);
- CV_CALL( class_ranges = cvCreateMat( 1, nclusters+1, CV_32SC1 ));
- if( nclusters > 1 )
- {
- CV_CALL( labels = cvCreateMat( 1, nsamples, CV_32SC1 ));
-
- // Not fully executed in case means are already given
- kmeans( train_data, nclusters, labels, cvTermCriteria( CV_TERMCRIT_ITER,
- params.means ? 1 : 10, 0.5 ), params.means );
+ cv::Mat rotatedCenteredSample = covMatType != EM::COV_MAT_GENERIC ?
+ centeredSample : centeredSample * covsRotateMats[clusterIndex];
- CV_CALL( cvSortSamplesByClasses( (const float**)train_data.data.fl,
- labels, class_ranges->data.i ));
- }
- else
+ float Lval = 0;
+ for(int di = 0; di < dim; di++)
{
- class_ranges->data.i[0] = 0;
- class_ranges->data.i[1] = nsamples;
+ float w = invCovsEigenValues[clusterIndex].at<float>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0);
+ float val = rotatedCenteredSample.at<float>(di);
+ Lval += w * val * val;
}
+ CV_DbgAssert(!logWeightDivDet.empty());
+ Lval = logWeightDivDet.at<float>(clusterIndex) - 0.5 * Lval;
+ L.at<float>(clusterIndex) = Lval;
- for( i = 0; i < nclusters; i++ )
- {
- int left = class_ranges->data.i[i], right = class_ranges->data.i[i+1];
- max_count = MAX( max_count, right - left );
- }
- CV_CALL( hdr = (CvMat*)cvAlloc( max_count*sizeof(hdr[0]) ));
- CV_CALL( vec = (const void**)cvAlloc( max_count*sizeof(vec[0]) ));
- hdr[0] = cvMat( 1, dims, CV_32F );
- for( i = 0; i < max_count; i++ )
- {
- vec[i] = hdr + i;
- hdr[i] = hdr[0];
- }
+ if(Lval > L.at<float>(label))
+ label = clusterIndex;
+ }
- for( i = 0; i < nclusters; i++ )
- {
- int left = class_ranges->data.i[i], right = class_ranges->data.i[i+1];
- int cluster_size = right - left;
- CvMat avg;
+ if(!probs && !likelihood)
+ return;
- if( cluster_size <= 0 )
- continue;
+ // TODO maybe without finding max L value
+ cv::exp(L, expL);
+ float partExpSum = 0, // sum_j!=q (exp(L_jk)
+ factor; // 1/(1 + sum_j!=q (exp(L_jk))
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
+ {
+ if(clusterIndex != label)
+ partExpSum += expL.at<float>(clusterIndex);
+ }
+ factor = 1./(1 + partExpSum);
- for( j = left; j < right; j++ )
- hdr[j - left].data.fl = train_data.data.fl[j];
+ cv::exp(L - L.at<float>(label), expL);
- CV_CALL( cvGetRow( means, &avg, i ));
- CV_CALL( cvCalcCovarMatrix( vec, cluster_size, covs[i],
- &avg, CV_COVAR_NORMAL | CV_COVAR_SCALE ));
- weights->data.db[i] = (double)cluster_size/(double)nsamples;
- }
+ if(probs)
+ {
+ probs->create(1, nclusters, CV_32FC1);
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
+ probs->at<float>(clusterIndex) = expL.at<float>(clusterIndex) * factor;
}
- __END__;
-
- cvReleaseMat( &class_ranges );
- cvReleaseMat( &labels );
- cvFree( &hdr );
- cvFree( &vec );
+ if(likelihood)
+ {
+ // note likelihood = log (sum_j exp(L_ij)) - 0.5 * dims * ln2Pi
+ *likelihood = std::log(partExpSum + expL.at<float>(label)) - 0.5 * dim * CV_LOG2PI;
+ }
}
-
-void CvEM::kmeans( const CvVectors& train_data, int nclusters, CvMat* labels,
- CvTermCriteria termcrit, const CvMat* /*centers0*/ )
+void EM::eStep()
{
- int i, nsamples = train_data.count, dims = train_data.dims;
- cv::Ptr<CvMat> temp_mat = cvCreateMat(nsamples, dims, CV_32F);
+ // Compute probs_ik from means_k, covs_k and weights_k.
+ trainProbs.create(trainSamples.rows, nclusters, CV_32FC1);
+ trainLabels.create(trainSamples.rows, 1, CV_32SC1);
+ trainLikelihoods.create(trainSamples.rows, 1, CV_32FC1);
- for( i = 0; i < nsamples; i++ )
- memcpy( temp_mat->data.ptr + temp_mat->step*i, train_data.data.fl[i], dims*sizeof(float));
+ computeLogWeightDivDet();
- cvKMeans2(temp_mat, nclusters, labels, termcrit, 10);
+ for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
+ {
+ cv::Mat sampleProbs = trainProbs.row(sampleIndex);
+ computeProbabilities(trainSamples.row(sampleIndex), trainLabels.at<int>(sampleIndex),
+ &sampleProbs, &trainLikelihoods.at<float>(sampleIndex));
+ }
}
-
-/****************************************************************************************/
-/* log_weight_div_det[k] = -2*log(weights_k) + log(det(Sigma_k)))
-
- covs[k] = cov_rotate_mats[k] * cov_eigen_values[k] * (cov_rotate_mats[k])'
- cov_rotate_mats[k] are orthogonal matrices of eigenvectors and
- cov_eigen_values[k] are diagonal matrices (represented by 1D vectors) of eigen values.
-
- The <alpha_ik> is the probability of the vector x_i to belong to the k-th cluster:
- <alpha_ik> ~ weights_k * exp{ -0.5[ln(det(Sigma_k)) + (x_i - mu_k)' Sigma_k^(-1) (x_i - mu_k)] }
- We calculate these probabilities here by the equivalent formulae:
- Denote
- S_ik = -0.5(log(det(Sigma_k)) + (x_i - mu_k)' Sigma_k^(-1) (x_i - mu_k)) + log(weights_k),
- M_i = max_k S_ik = S_qi, so that the q-th class is the one where maximum reaches. Then
- alpha_ik = exp{ S_ik - M_i } / ( 1 + sum_j!=q exp{ S_ji - M_i })
-*/
-double CvEM::run_em( const CvVectors& train_data )
+void EM::mStep()
{
- CvMat* centered_sample = 0;
- CvMat* covs_item = 0;
- CvMat* log_det = 0;
- CvMat* log_weights = 0;
- CvMat* cov_eigen_values = 0;
- CvMat* samples = 0;
- CvMat* sum_probs = 0;
- log_likelihood = -DBL_MAX;
-
- CV_FUNCNAME( "CvEM::run_em" );
- __BEGIN__;
-
- int nsamples = train_data.count, dims = train_data.dims, nclusters = params.nclusters;
- double min_variation = FLT_EPSILON;
- double min_det_value = MAX( DBL_MIN, pow( min_variation, dims ));
- double _log_likelihood = -DBL_MAX;
- int start_step = params.start_step;
- double sum_max_val;
-
- int i, j, k, n;
- int is_general = 0, is_diagonal = 0, is_spherical = 0;
- double prev_log_likelihood = -DBL_MAX / 1000., det, d;
- CvMat whdr, iwhdr, diag, *w, *iw;
- double* w_data;
- double* sp_data;
-
- if( nclusters == 1 )
- {
- double log_weight;
- CV_CALL( cvSet( probs, cvScalar(1.)) );
-
- if( params.cov_mat_type == COV_MAT_SPHERICAL )
- {
- d = cvTrace(*covs).val[0]/dims;
- d = MAX( d, FLT_EPSILON );
- inv_eigen_values->data.db[0] = 1./d;
- log_weight = pow( d, dims*0.5 );
- }
- else
- {
- w_data = inv_eigen_values->data.db;
-
- if( params.cov_mat_type == COV_MAT_GENERIC )
- cvSVD( *covs, inv_eigen_values, *cov_rotate_mats, 0, CV_SVD_U_T );
- else
- cvTranspose( cvGetDiag(*covs, &diag), inv_eigen_values );
+ trainCounts.create(1, nclusters, CV_32SC1);
+ trainCounts = cv::Scalar(0);
- cvMaxS( inv_eigen_values, FLT_EPSILON, inv_eigen_values );
- for( j = 0, det = 1.; j < dims; j++ )
- det *= w_data[j];
- log_weight = sqrt(det);
- cvDiv( 0, inv_eigen_values, inv_eigen_values );
- }
-
- log_weight_div_det->data.db[0] = -2*log(weights->data.db[0]/log_weight);
- log_likelihood = DBL_MAX/1000.;
- EXIT;
- }
+ for(int sampleIndex = 0; sampleIndex < trainLabels.rows; sampleIndex++)
+ trainCounts.at<int>(trainLabels.at<int>(sampleIndex))++;
- if( params.cov_mat_type == COV_MAT_GENERIC )
- is_general = 1;
- else if( params.cov_mat_type == COV_MAT_DIAGONAL )
- is_diagonal = 1;
- else if( params.cov_mat_type == COV_MAT_SPHERICAL )
- is_spherical = 1;
- /* In the case of <cov_mat_type> == COV_MAT_DIAGONAL, the k-th row of cov_eigen_values
- contains the diagonal elements (variations). In the case of
- <cov_mat_type> == COV_MAT_SPHERICAL - the 0-ths elements of the vectors cov_eigen_values[k]
- are to be equal to the mean of the variations over all the dimensions. */
-
- CV_CALL( log_det = cvCreateMat( 1, nclusters, CV_64FC1 ));
- CV_CALL( log_weights = cvCreateMat( 1, nclusters, CV_64FC1 ));
- CV_CALL( covs_item = cvCreateMat( dims, dims, CV_64FC1 ));
- CV_CALL( centered_sample = cvCreateMat( 1, dims, CV_64FC1 ));
- CV_CALL( cov_eigen_values = cvCreateMat( inv_eigen_values->rows, inv_eigen_values->cols, CV_64FC1 ));
- CV_CALL( samples = cvCreateMat( nsamples, dims, CV_64FC1 ));
- CV_CALL( sum_probs = cvCreateMat( 1, nclusters, CV_64FC1 ));
- sp_data = sum_probs->data.db;
-
- // copy the training data into double-precision matrix
- for( i = 0; i < nsamples; i++ )
+ if(cv::countNonZero(trainCounts) != (int)trainCounts.total())
{
- const float* src = train_data.data.fl[i];
- double* dst = (double*)(samples->data.ptr + samples->step*i);
-
- for( j = 0; j < dims; j++ )
- dst[j] = src[j];
+ clusterTrainSamples();
}
-
- if( start_step != START_M_STEP )
+ else
{
- for( k = 0; k < nclusters; k++ )
- {
- if( is_general || is_diagonal )
- {
- w = cvGetRow( cov_eigen_values, &whdr, k );
- if( is_general )
- cvSVD( covs[k], w, cov_rotate_mats[k], 0, CV_SVD_U_T );
- else
- cvTranspose( cvGetDiag( covs[k], &diag ), w );
- w_data = w->data.db;
- for( j = 0, det = 0.; j < dims; j++ )
- det += std::log(w_data[j]);
- if( det < std::log(min_det_value) )
- {
- if( start_step == START_AUTO_STEP )
- det = std::log(min_det_value);
- else
- EXIT;
- }
- log_det->data.db[k] = det;
- }
- else // spherical
- {
- d = cvTrace(covs[k]).val[0]/(double)dims;
- if( d < min_variation )
- {
- if( start_step == START_AUTO_STEP )
- d = min_variation;
- else
- EXIT;
- }
- cov_eigen_values->data.db[k] = d;
- log_det->data.db[k] = d;
- }
- }
+ // Update means_k, covs_k and weights_k from probs_ik
+ int dim = trainSamples.cols;
- if( is_spherical )
- {
- cvLog( log_det, log_det );
- cvScale( log_det, log_det, dims );
- }
- }
+ // Update weights
+ // not normalized first
+ cv::reduce(trainProbs, weights, 0, CV_REDUCE_SUM);
- for( n = 0; n < params.term_crit.max_iter; n++ )
- {
- if( n > 0 || start_step != START_M_STEP )
+ // Update means
+ means.create(nclusters, dim, CV_32FC1);
+ means = cv::Scalar(0);
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
- // e-step: compute probs_ik from means_k, covs_k and weights_k.
- CV_CALL(cvLog( weights, log_weights ));
-
- sum_max_val = 0.;
- // S_ik = -0.5[log(det(Sigma_k)) + (x_i - mu_k)' Sigma_k^(-1) (x_i - mu_k)] + log(weights_k)
- for( k = 0; k < nclusters; k++ )
- {
- CvMat* u = cov_rotate_mats[k];
- const double* mean = (double*)(means->data.ptr + means->step*k);
- w = cvGetRow( cov_eigen_values, &whdr, k );
- iw = cvGetRow( inv_eigen_values, &iwhdr, k );
- cvDiv( 0, w, iw );
-
- w_data = (double*)(inv_eigen_values->data.ptr + inv_eigen_values->step*k);
-
- for( i = 0; i < nsamples; i++ )
- {
- double *csample = centered_sample->data.db, p = log_det->data.db[k];
- const double* sample = (double*)(samples->data.ptr + samples->step*i);
- double* pp = (double*)(probs->data.ptr + probs->step*i);
- for( j = 0; j < dims; j++ )
- csample[j] = sample[j] - mean[j];
- if( is_general )
- cvGEMM( centered_sample, u, 1, 0, 0, centered_sample, CV_GEMM_B_T );
- for( j = 0; j < dims; j++ )
- p += csample[j]*csample[j]*w_data[is_spherical ? 0 : j];
- //pp[k] = -0.5*p + log_weights->data.db[k];
- pp[k] = -0.5*(p+CV_LOG2PI * (double)dims) + log_weights->data.db[k];
-
- // S_ik <- S_ik - max_j S_ij
- if( k == nclusters - 1 )
- {
- double max_val = pp[0];
- for( j = 1; j < nclusters; j++ )
- max_val = MAX( max_val, pp[j] );
- sum_max_val += max_val;
- for( j = 0; j < nclusters; j++ )
- pp[j] -= max_val;
- }
- }
- }
-
- CV_CALL(cvExp( probs, probs )); // exp( S_ik )
- cvZero( sum_probs );
-
- // alpha_ik = exp( S_ik ) / sum_j exp( S_ij ),
- // log_likelihood = sum_i log (sum_j exp(S_ij))
- for( i = 0, _log_likelihood = 0; i < nsamples; i++ )
- {
- double* pp = (double*)(probs->data.ptr + probs->step*i), sum = 0;
- for( j = 0; j < nclusters; j++ )
- sum += pp[j];
- sum = 1./MAX( sum, DBL_EPSILON );
- for( j = 0; j < nclusters; j++ )
- {
- double p = pp[j] *= sum;
- sp_data[j] += p;
- }
- _log_likelihood -= log( sum );
- }
- _log_likelihood+=sum_max_val;
-
- // Check termination criteria. Use the same termination criteria as it is used in MATLAB
- log_likelihood_delta = _log_likelihood - prev_log_likelihood;
-// if( n>0 )
-// fprintf(stderr, "iter=%d, ll=%0.5f (delta=%0.5f, goal=%0.5f)\n",
-// n, _log_likelihood, delta, params.term_crit.epsilon * fabs( _log_likelihood));
- if ( log_likelihood_delta > 0 && log_likelihood_delta < params.term_crit.epsilon * std::fabs( _log_likelihood) )
- break;
- prev_log_likelihood = _log_likelihood;
+ cv::Mat clusterMean = means.row(clusterIndex);
+ for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
+ clusterMean += trainProbs.at<float>(sampleIndex, clusterIndex) * trainSamples.row(sampleIndex);
+ clusterMean /= weights.at<float>(clusterIndex);
}
- // m-step: update means_k, covs_k and weights_k from probs_ik
- cvGEMM( probs, samples, 1, 0, 0, means, CV_GEMM_A_T );
-
- for( k = 0; k < nclusters; k++ )
+ // Update covsEigenValues and invCovsEigenValues
+ covs.resize(nclusters);
+ covsEigenValues.resize(nclusters);
+ if(covMatType == EM::COV_MAT_GENERIC)
+ covsRotateMats.resize(nclusters);
+ invCovsEigenValues.resize(nclusters);
+ for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
- double sum = sp_data[k], inv_sum = 1./sum;
- CvMat* cov = covs[k], _mean, _sample;
-
- w = cvGetRow( cov_eigen_values, &whdr, k );
- w_data = w->data.db;
- cvGetRow( means, &_mean, k );
- cvGetRow( samples, &_sample, k );
+ if(covMatType != EM::COV_MAT_SPHERICAL)
+ covsEigenValues[clusterIndex].create(1, dim, CV_32FC1);
+ else
+ covsEigenValues[clusterIndex].create(1, 1, CV_32FC1);
- // update weights_k
- weights->data.db[k] = sum;
+ if(covMatType == EM::COV_MAT_GENERIC)
+ covs[clusterIndex].create(dim, dim, CV_32FC1);
- // update means_k
- cvScale( &_mean, &_mean, inv_sum );
+ cv::Mat clusterCov = covMatType != EM::COV_MAT_GENERIC ?
+ covsEigenValues[clusterIndex] : covs[clusterIndex];
- // compute covs_k
- cvZero( cov );
- cvZero( w );
+ clusterCov = cv::Scalar(0);
- for( i = 0; i < nsamples; i++ )
+ cv::Mat centeredSample;
+ for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
{
- double p = probs->data.db[i*nclusters + k]*inv_sum;
- _sample.data.db = (double*)(samples->data.ptr + samples->step*i);
+ centeredSample = trainSamples.row(sampleIndex) - means.row(clusterIndex);
- if( is_general )
- {
- cvMulTransposed( &_sample, covs_item, 1, &_mean );
- cvScaleAdd( covs_item, cvRealScalar(p), cov, cov );
- }
+ if(covMatType == EM::COV_MAT_GENERIC)
+ clusterCov += trainProbs.at<float>(sampleIndex, clusterIndex) * centeredSample.t() * centeredSample;
else
- for( j = 0; j < dims; j++ )
+ {
+ float p = trainProbs.at<float>(sampleIndex, clusterIndex);
+ for(int di = 0; di < dim; di++ )
{
- double val = _sample.data.db[j] - _mean.data.db[j];
- w_data[is_spherical ? 0 : j] += p*val*val;
+ float val = centeredSample.at<float>(di);
+ clusterCov.at<float>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0) += p*val*val;
}
+ }
}
- if( is_spherical )
- {
- d = w_data[0]/(double)dims;
- d = MAX( d, min_variation );
- w->data.db[0] = d;
- log_det->data.db[k] = d;
- }
- else
+ if(covMatType == EM::COV_MAT_SPHERICAL)
+ clusterCov /= dim;
+
+ clusterCov /= weights.at<float>(clusterIndex);
+
+ // Update covsRotateMats for EM::COV_MAT_GENERIC only
+ if(covMatType == EM::COV_MAT_GENERIC)
{
- // Det. of general NxN cov. matrix is the prod. of the eig. vals
- if( is_general )
- cvSVD( cov, w, cov_rotate_mats[k], 0, CV_SVD_U_T );
- cvMaxS( w, min_variation, w );
- for( j = 0, det = 0.; j < dims; j++ )
- det += std::log( w_data[j] );
- log_det->data.db[k] = det;
+ cv::SVD svd(covs[clusterIndex], cv::SVD::MODIFY_A + cv::SVD::FULL_UV);
+ covsEigenValues[clusterIndex] = svd.w;
+ covsRotateMats[clusterIndex] = svd.u;
}
- }
- cvConvertScale( weights, weights, 1./(double)nsamples, 0 );
- cvMaxS( weights, DBL_MIN, weights );
+ cv::max(covsEigenValues[clusterIndex], minEigenValue, covsEigenValues[clusterIndex]);
- if( is_spherical )
- {
- cvLog( log_det, log_det );
- cvScale( log_det, log_det, dims );
+ // update invCovsEigenValues
+ invCovsEigenValues[clusterIndex] = 1./covsEigenValues[clusterIndex];
}
- } // end of iteration process
- //log_weight_div_det[k] = -2*log(weights_k/det(Sigma_k))^0.5) = -2*log(weights_k) + log(det(Sigma_k)))
- if( log_weight_div_det )
- {
- cvScale( log_weights, log_weight_div_det, -2 );
- cvAdd( log_weight_div_det, log_det, log_weight_div_det );
- }
-
- /* Now finalize all the covariation matrices:
- 1) if <cov_mat_type> == COV_MAT_DIAGONAL we used array of <w> as diagonals.
- Now w[k] should be copied back to the diagonals of covs[k];
- 2) if <cov_mat_type> == COV_MAT_SPHERICAL we used the 0-th element of w[k]
- as an average variation in each cluster. The value of the 0-th element of w[k]
- should be copied to the all of the diagonal elements of covs[k]. */
- if( is_spherical )
- {
- for( k = 0; k < nclusters; k++ )
- cvSetIdentity( covs[k], cvScalar(cov_eigen_values->data.db[k]));
+ // Normalize weights
+ weights /= trainSamples.rows;
}
- else if( is_diagonal )
- {
- for( k = 0; k < nclusters; k++ )
- cvTranspose( cvGetRow( cov_eigen_values, &whdr, k ),
- cvGetDiag( covs[k], &diag ));
- }
- cvDiv( 0, cov_eigen_values, inv_eigen_values );
-
- log_likelihood = _log_likelihood;
-
- __END__;
-
- cvReleaseMat( &log_det );
- cvReleaseMat( &log_weights );
- cvReleaseMat( &covs_item );
- cvReleaseMat( ¢ered_sample );
- cvReleaseMat( &cov_eigen_values );
- cvReleaseMat( &samples );
- cvReleaseMat( &sum_probs );
-
- return log_likelihood;
}
-
-int CvEM::get_nclusters() const
+void EM::read(const FileNode& fn)
{
- return params.nclusters;
-}
+ Algorithm::read(fn);
-const CvMat* CvEM::get_means() const
-{
- return means;
-}
-
-const CvMat** CvEM::get_covs() const
-{
- return (const CvMat**)covs;
-}
-
-const CvMat* CvEM::get_weights() const
-{
- return weights;
-}
-
-const CvMat* CvEM::get_probs() const
-{
- return probs;
+ decomposeCovs();
+ computeLogWeightDivDet();
}
-using namespace cv;
-
-CvEM::CvEM( const Mat& samples, const Mat& sample_idx, CvEMParams params )
+static Algorithm* createEM()
{
- means = weights = probs = inv_eigen_values = log_weight_div_det = 0;
- covs = cov_rotate_mats = 0;
-
- // just invoke the train() method
- train(samples, sample_idx, params);
+ return new EM;
}
+static AlgorithmInfo em_info("StatModel.EM", createEM);
-bool CvEM::train( const Mat& _samples, const Mat& _sample_idx,
- CvEMParams _params, Mat* _labels )
+AlgorithmInfo* EM::info() const
{
- CvMat samples = _samples, sidx = _sample_idx, labels, *plabels = 0;
-
- if( _labels )
+ static volatile bool initialized = false;
+ if( !initialized )
{
- int nsamples = sidx.data.ptr ? sidx.rows : samples.rows;
+ EM obj;
+ em_info.addParam(obj, "nclusters", obj.nclusters);
+ em_info.addParam(obj, "covMatType", obj.covMatType);
- if( !(_labels->data && _labels->type() == CV_32SC1 &&
- (_labels->cols == 1 || _labels->rows == 1) &&
- _labels->cols + _labels->rows - 1 == nsamples) )
- _labels->create(nsamples, 1, CV_32SC1);
- plabels = &(labels = *_labels);
- }
- return train(&samples, sidx.data.ptr ? &sidx : 0, _params, plabels);
-}
-
-float
-CvEM::predict( const Mat& _sample, Mat* _probs ) const
-{
- CvMat sample = _sample, probs, *pprobs = 0;
+ em_info.addParam(obj, "weights", obj.weights);
+ em_info.addParam(obj, "means", obj.means);
+ em_info.addParam(obj, "covs", obj.covs);
- if( _probs )
- {
- int nclusters = params.nclusters;
- if(!(_probs->data && (_probs->type() == CV_32F || _probs->type()==CV_64F) &&
- (_probs->cols == 1 || _probs->rows == 1) &&
- _probs->cols + _probs->rows - 1 == nclusters))
- _probs->create(nclusters, 1, _sample.type());
- pprobs = &(probs = *_probs);
+ initialized = true;
}
- return predict(&sample, pprobs);
+ return &em_info;
}
-
-int CvEM::getNClusters() const
-{
- return params.nclusters;
-}
-
-Mat CvEM::getMeans() const
-{
- return Mat(means);
-}
-
-void CvEM::getCovs(vector<Mat>& _covs) const
-{
- int i, n = params.nclusters;
- _covs.resize(n);
- for( i = 0; i < n; i++ )
- Mat(covs[i]).copyTo(_covs[i]);
-}
-
-Mat CvEM::getWeights() const
-{
- return Mat(weights);
-}
-
-Mat CvEM::getProbs() const
-{
- return Mat(probs);
-}
-
+} // namespace cv
/* End of file. */
using namespace std;
using namespace cv;
-void defaultDistribs( vector<Mat>& means, vector<Mat>& covs )
+static
+void defaultDistribs( Mat& means, vector<Mat>& covs )
{
float mp0[] = {0.0f, 0.0f}, cp0[] = {0.67f, 0.0f, 0.0f, 0.67f};
float mp1[] = {5.0f, 0.0f}, cp1[] = {1.0f, 0.0f, 0.0f, 1.0f};
float mp2[] = {1.0f, 5.0f}, cp2[] = {1.0f, 0.0f, 0.0f, 1.0f};
+ means.create(3, 2, CV_32FC1);
Mat m0( 1, 2, CV_32FC1, mp0 ), c0( 2, 2, CV_32FC1, cp0 );
Mat m1( 1, 2, CV_32FC1, mp1 ), c1( 2, 2, CV_32FC1, cp1 );
Mat m2( 1, 2, CV_32FC1, mp2 ), c2( 2, 2, CV_32FC1, cp2 );
means.resize(3), covs.resize(3);
- m0.copyTo(means[0]), c0.copyTo(covs[0]);
- m1.copyTo(means[1]), c1.copyTo(covs[1]);
- m2.copyTo(means[2]), c2.copyTo(covs[2]);
+
+ Mat mr0 = means.row(0);
+ m0.copyTo(mr0);
+ c0.copyTo(covs[0]);
+
+ Mat mr1 = means.row(1);
+ m1.copyTo(mr1);
+ c1.copyTo(covs[1]);
+
+ Mat mr2 = means.row(2);
+ m2.copyTo(mr2);
+ c2.copyTo(covs[2]);
}
// generate points sets by normal distributions
-void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const vector<Mat>& means, const vector<Mat>& covs, int labelType )
+static
+void generateData( Mat& data, Mat& labels, const vector<int>& sizes, const Mat& _means, const vector<Mat>& covs, int labelType )
{
vector<int>::const_iterator sit = sizes.begin();
int total = 0;
for( ; sit != sizes.end(); ++sit )
total += *sit;
- assert( means.size() == sizes.size() && covs.size() == sizes.size() );
+ assert( _means.rows == (int)sizes.size() && covs.size() == sizes.size() );
assert( !data.empty() && data.rows == total );
assert( data.type() == CV_32FC1 );
labels.create( data.rows, 1, labelType );
randn( data, Scalar::all(0.0), Scalar::all(1.0) );
+ vector<Mat> means(sizes.size());
+ for(int i = 0; i < _means.rows; i++)
+ means[i] = _means.row(i);
vector<Mat>::const_iterator mit = means.begin(), cit = covs.begin();
int bi, ei = 0;
sit = sizes.begin();
}
}
+static
int maxIdx( const vector<int>& count )
{
int idx = -1;
return idx;
}
+static
bool getLabelsMap( const Mat& labels, const vector<int>& sizes, vector<int>& labelsMap )
{
- int total = 0, setCount = (int)sizes.size();
- vector<int>::const_iterator sit = sizes.begin();
- for( ; sit != sizes.end(); ++sit )
- total += *sit;
+ size_t total = 0, nclusters = sizes.size();
+ for(size_t i = 0; i < sizes.size(); i++)
+ total += sizes[i];
+
assert( !labels.empty() );
- assert( labels.rows == total && labels.cols == 1 );
+ assert( labels.total() == total && (labels.cols == 1 || labels.rows == 1));
assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
bool isFlt = labels.type() == CV_32FC1;
- labelsMap.resize(setCount);
- vector<int>::iterator lmit = labelsMap.begin();
- vector<bool> buzy(setCount, false);
- int bi, ei = 0;
- for( sit = sizes.begin(); sit != sizes.end(); ++sit, ++lmit )
+
+ labelsMap.resize(nclusters);
+
+ vector<bool> buzy(nclusters, false);
+ int startIndex = 0;
+ for( size_t clusterIndex = 0; clusterIndex < sizes.size(); clusterIndex++ )
{
- vector<int> count( setCount, 0 );
- bi = ei;
- ei = bi + *sit;
- if( isFlt )
+ vector<int> count( nclusters, 0 );
+ for( int i = startIndex; i < startIndex + sizes[clusterIndex]; i++)
{
- for( int i = bi; i < ei; i++ )
- count[(int)labels.at<float>(i, 0)]++;
+ int lbl = isFlt ? (int)labels.at<float>(i) : labels.at<int>(i);
+ CV_Assert(lbl < (int)nclusters);
+ count[lbl]++;
+ CV_Assert(count[lbl] < (int)total);
}
- else
- {
- for( int i = bi; i < ei; i++ )
- count[labels.at<int>(i, 0)]++;
- }
-
- *lmit = maxIdx( count );
- if( buzy[*lmit] )
- return false;
- buzy[*lmit] = true;
+ startIndex += sizes[clusterIndex];
+
+ int cls = maxIdx( count );
+ CV_Assert( !buzy[cls] );
+
+ labelsMap[clusterIndex] = cls;
+
+ buzy[cls] = true;
}
- return true;
+ for(size_t i = 0; i < buzy.size(); i++)
+ if(!buzy[i])
+ return false;
+
+ return true;
}
-float calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, bool labelsEquivalent = true )
+static
+bool calcErr( const Mat& labels, const Mat& origLabels, const vector<int>& sizes, float& err, bool labelsEquivalent = true )
{
- int err = 0;
- assert( !labels.empty() && !origLabels.empty() );
- assert( labels.cols == 1 && origLabels.cols == 1 );
- assert( labels.rows == origLabels.rows );
- assert( labels.type() == origLabels.type() );
- assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
+ err = 0;
+ CV_Assert( !labels.empty() && !origLabels.empty() );
+ CV_Assert( labels.rows == 1 || labels.cols == 1 );
+ CV_Assert( origLabels.rows == 1 || origLabels.cols == 1 );
+ CV_Assert( labels.total() == origLabels.total() );
+ CV_Assert( labels.type() == CV_32SC1 || labels.type() == CV_32FC1 );
+ CV_Assert( origLabels.type() == labels.type() );
vector<int> labelsMap;
bool isFlt = labels.type() == CV_32FC1;
if( !labelsEquivalent )
{
- getLabelsMap( labels, sizes, labelsMap );
+ if( !getLabelsMap( labels, sizes, labelsMap ) )
+ return false;
+
for( int i = 0; i < labels.rows; i++ )
if( isFlt )
- err += labels.at<float>(i, 0) != labelsMap[(int)origLabels.at<float>(i, 0)];
+ err += labels.at<float>(i) != labelsMap[(int)origLabels.at<float>(i)] ? 1.f : 0.f;
else
- err += labels.at<int>(i, 0) != labelsMap[origLabels.at<int>(i, 0)];
+ err += labels.at<int>(i) != labelsMap[origLabels.at<int>(i)] ? 1.f : 0.f;
}
else
{
for( int i = 0; i < labels.rows; i++ )
if( isFlt )
- err += labels.at<float>(i, 0) != origLabels.at<float>(i, 0);
+ err += labels.at<float>(i) != origLabels.at<float>(i) ? 1.f : 0.f;
else
- err += labels.at<int>(i, 0) != origLabels.at<int>(i, 0);
+ err += labels.at<int>(i) != origLabels.at<int>(i) ? 1.f : 0.f;
}
- return (float)err / (float)labels.rows;
+ err /= (float)labels.rows;
+ return true;
}
//--------------------------------------------------------------------------------------------
Mat data( pointsCount, 2, CV_32FC1 ), labels;
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
- vector<Mat> means, covs;
+ Mat means;
+ vector<Mat> covs;
defaultDistribs( means, covs );
generateData( data, labels, sizes, means, covs, CV_32SC1 );
Mat bestLabels;
// 1. flag==KMEANS_PP_CENTERS
kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_PP_CENTERS, noArray() );
- err = calcErr( bestLabels, labels, sizes, false );
- if( err > 0.01f )
+ if( !calcErr( bestLabels, labels, sizes, err , false ) )
+ {
+ ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_PP_CENTERS.\n" );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.01f )
{
ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_PP_CENTERS.\n", err );
code = cvtest::TS::FAIL_BAD_ACCURACY;
// 2. flag==KMEANS_RANDOM_CENTERS
kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_RANDOM_CENTERS, noArray() );
- err = calcErr( bestLabels, labels, sizes, false );
- if( err > 0.01f )
+ if( !calcErr( bestLabels, labels, sizes, err, false ) )
{
- ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_PP_CENTERS.\n", err );
+ ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_RANDOM_CENTERS.\n" );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.01f )
+ {
+ ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_RANDOM_CENTERS.\n", err );
code = cvtest::TS::FAIL_BAD_ACCURACY;
}
for( int i = 0; i < 0.5f * pointsCount; i++ )
bestLabels.at<int>( rng.next() % pointsCount, 0 ) = rng.next() % 3;
kmeans( data, 3, bestLabels, TermCriteria( TermCriteria::COUNT, iters, 0.0), 0, KMEANS_USE_INITIAL_LABELS, noArray() );
- err = calcErr( bestLabels, labels, sizes, false );
- if( err > 0.01f )
+ if( !calcErr( bestLabels, labels, sizes, err, false ) )
{
- ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_PP_CENTERS.\n", err );
+ ts->printf( cvtest::TS::LOG, "Bad output labels if flag==KMEANS_USE_INITIAL_LABELS.\n" );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.01f )
+ {
+ ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) if flag==KMEANS_USE_INITIAL_LABELS.\n", err );
code = cvtest::TS::FAIL_BAD_ACCURACY;
}
// train data
Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels;
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
- vector<Mat> means, covs;
+ Mat means;
+ vector<Mat> covs;
defaultDistribs( means, covs );
generateData( trainData, trainLabels, sizes, means, covs, CV_32FC1 );
KNearest knearest;
knearest.train( trainData, trainLabels );
knearest.find_nearest( testData, 4, &bestLabels );
- float err = calcErr( bestLabels, testLabels, sizes, true );
- if( err > 0.01f )
+ float err;
+ if( !calcErr( bestLabels, testLabels, sizes, err, true ) )
+ {
+ ts->printf( cvtest::TS::LOG, "Bad output labels.\n" );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.01f )
{
ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) on test data.\n", err );
code = cvtest::TS::FAIL_BAD_ACCURACY;
}
//--------------------------------------------------------------------------------------------
-class CV_EMTest : public cvtest::BaseTest {
+class CV_EMTest : public cvtest::BaseTest
+{
public:
CV_EMTest() {}
protected:
virtual void run( int start_from );
+ int runCase( int caseIndex, const cv::EM::Params& params,
+ const cv::Mat& trainData, const cv::Mat& trainLabels,
+ const cv::Mat& testData, const cv::Mat& testLabels,
+ const vector<int>& sizes);
};
+int CV_EMTest::runCase( int caseIndex, const cv::EM::Params& params,
+ const cv::Mat& trainData, const cv::Mat& trainLabels,
+ const cv::Mat& testData, const cv::Mat& testLabels,
+ const vector<int>& sizes )
+{
+ int code = cvtest::TS::OK;
+
+ cv::Mat labels;
+ float err;
+
+ cv::EM em;
+ em.train( trainData, Mat(), params, &labels );
+
+ // check train error
+ if( !calcErr( labels, trainLabels, sizes, err , false ) )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.006f )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on train data.\n", caseIndex, err );
+ code = cvtest::TS::FAIL_BAD_ACCURACY;
+ }
+
+ // check test error
+ labels.create( testData.rows, 1, CV_32SC1 );
+ for( int i = 0; i < testData.rows; i++ )
+ {
+ Mat sample = testData.row(i);
+ labels.at<int>(i,0) = (int)em.predict( sample, 0 );
+ }
+ if( !calcErr( labels, testLabels, sizes, err, false ) )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad output labels.\n", caseIndex );
+ code = cvtest::TS::FAIL_INVALID_OUTPUT;
+ }
+ else if( err > 0.006f )
+ {
+ ts->printf( cvtest::TS::LOG, "Case index %i : Bad accuracy (%f) on test data.\n", caseIndex, err );
+ code = cvtest::TS::FAIL_BAD_ACCURACY;
+ }
+
+ return code;
+}
+
void CV_EMTest::run( int /*start_from*/ )
{
- int sizesArr[] = { 5000, 7000, 8000 };
+ int sizesArr[] = { 500, 700, 800 };
int pointsCount = sizesArr[0]+ sizesArr[1] + sizesArr[2];
+ // Points distribution
+ Mat means;
+ vector<Mat> covs;
+ defaultDistribs( means, covs );
+
// train data
Mat trainData( pointsCount, 2, CV_32FC1 ), trainLabels;
vector<int> sizes( sizesArr, sizesArr + sizeof(sizesArr) / sizeof(sizesArr[0]) );
- vector<Mat> means, covs;
- defaultDistribs( means, covs );
generateData( trainData, trainLabels, sizes, means, covs, CV_32SC1 );
// test data
- Mat testData( pointsCount, 2, CV_32FC1 ), testLabels, bestLabels;
+ Mat testData( pointsCount, 2, CV_32FC1 ), testLabels;
generateData( testData, testLabels, sizes, means, covs, CV_32SC1 );
- int code = cvtest::TS::OK;
- float err;
- ExpectationMaximization em;
- CvEMParams params;
+ cv::EM::Params params;
params.nclusters = 3;
- em.train( trainData, Mat(), params, &bestLabels );
+ Mat probs(trainData.rows, params.nclusters, CV_32FC1, cv::Scalar(1));
+ params.probs = &probs;
+ Mat weights(1, params.nclusters, CV_32FC1, cv::Scalar(1));
+ params.weights = &weights;
+ params.means = &means;
+ params.covs = &covs;
- // check train error
- err = calcErr( bestLabels, trainLabels, sizes, false );
- if( err > 0.002f )
+ int code = cvtest::TS::OK;
+ int caseIndex = 0;
{
- ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) on train data.\n", err );
- code = cvtest::TS::FAIL_BAD_ACCURACY;
+ params.startStep = cv::EM::START_AUTO_STEP;
+ params.covMatType = cv::EM::COV_MAT_GENERIC;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
}
-
- // check test error
- bestLabels.create( testData.rows, 1, CV_32SC1 );
- for( int i = 0; i < testData.rows; i++ )
{
- Mat sample( 1, testData.cols, CV_32FC1, testData.ptr<float>(i));
- bestLabels.at<int>(i,0) = (int)em.predict( sample, 0 );
+ params.startStep = cv::EM::START_AUTO_STEP;
+ params.covMatType = cv::EM::COV_MAT_DIAGONAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
}
- err = calcErr( bestLabels, testLabels, sizes, false );
- if( err > 0.005f )
{
- ts->printf( cvtest::TS::LOG, "Bad accuracy (%f) on test data.\n", err );
- code = cvtest::TS::FAIL_BAD_ACCURACY;
+ params.startStep = cv::EM::START_AUTO_STEP;
+ params.covMatType = cv::EM::COV_MAT_SPHERICAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.startStep = cv::EM::START_M_STEP;
+ params.covMatType = cv::EM::COV_MAT_GENERIC;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.startStep = cv::EM::START_M_STEP;
+ params.covMatType = cv::EM::COV_MAT_DIAGONAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.startStep = cv::EM::START_M_STEP;
+ params.covMatType = cv::EM::COV_MAT_SPHERICAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.startStep = cv::EM::START_E_STEP;
+ params.covMatType = cv::EM::COV_MAT_GENERIC;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.startStep = cv::EM::START_E_STEP;
+ params.covMatType = cv::EM::COV_MAT_DIAGONAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
+ }
+ {
+ params.startStep = cv::EM::START_E_STEP;
+ params.covMatType = cv::EM::COV_MAT_SPHERICAL;
+ int currCode = runCase(caseIndex++, params, trainData, trainLabels, testData, testLabels, sizes);
+ code = currCode == cvtest::TS::OK ? code : currCode;
}
ts->set_failed_test_info( code );
}
-class CV_EMTest_Smoke : public cvtest::BaseTest {
+class CV_EMTest_SaveLoad : public cvtest::BaseTest {
public:
- CV_EMTest_Smoke() {}
+ CV_EMTest_SaveLoad() {}
protected:
virtual void run( int /*start_from*/ )
{
int code = cvtest::TS::OK;
- CvEM em;
+ cv::EM em;
- Mat samples = Mat(3,2,CV_32F);
+ Mat samples = Mat(3,1,CV_32F);
samples.at<float>(0,0) = 1;
samples.at<float>(1,0) = 2;
samples.at<float>(2,0) = 3;
- CvEMParams params;
+ cv::EM::Params params;
params.nclusters = 2;
Mat labels;
string filename = tempfile() + ".xml";
{
FileStorage fs = FileStorage(filename, FileStorage::WRITE);
-
try
{
- em.write(fs.fs, "EM");
+ fs << "em" << "{";
+ em.write(fs);
+ fs << "}";
}
catch(...)
{
// Read in
{
FileStorage fs = FileStorage(filename, FileStorage::READ);
- FileNode fileNode = fs["EM"];
-
+ CV_Assert(fs.isOpened());
+ FileNode fn = fs["em"];
try
{
- em.read(const_cast<CvFileStorage*>(fileNode.fs), const_cast<CvFileNode*>(fileNode.node));
+ em.read(fn);
}
catch(...)
{
TEST(ML_KMeans, accuracy) { CV_KMeansTest test; test.safe_run(); }
TEST(ML_KNearest, accuracy) { CV_KNearestTest test; test.safe_run(); }
TEST(ML_EM, accuracy) { CV_EMTest test; test.safe_run(); }
-TEST(ML_EM, smoke) { CV_EMTest_Smoke test; test.safe_run(); }
+TEST(ML_EM, save_load) { CV_EMTest_SaveLoad test; test.safe_run(); }
nbayes = 0;
knearest = 0;
svm = 0;
- em = 0;
ann = 0;
dtree = 0;
boost = 0;
knearest = new CvKNearest;
else if( !modelName.compare(CV_SVM) )
svm = new CvSVM;
- else if( !modelName.compare(CV_EM) )
- em = new CvEM;
else if( !modelName.compare(CV_ANN) )
ann = new CvANN_MLP;
else if( !modelName.compare(CV_DTREE) )
delete knearest;
if( svm )
delete svm;
- if( em )
- delete em;
if( ann )
delete ann;
if( dtree )
knearest->save( filename );
else if( !modelName.compare(CV_SVM) )
svm->save( filename );
- else if( !modelName.compare(CV_EM) )
- em->save( filename );
else if( !modelName.compare(CV_ANN) )
ann->save( filename );
else if( !modelName.compare(CV_DTREE) )
knearest->load( filename );
else if( !modelName.compare(CV_SVM) )
svm->load( filename );
- else if( !modelName.compare(CV_EM) )
- em->load( filename );
else if( !modelName.compare(CV_ANN) )
ann->load( filename );
else if( !modelName.compare(CV_DTREE) )
CvNormalBayesClassifier* nbayes;
CvKNearest* knearest;
CvSVM* svm;
- CvEM* em;
CvANN_MLP* ann;
CvDTree* dtree;
CvBoost* boost;
-#include "opencv2/ml/ml.hpp"
+#include "opencv2/legacy/legacy.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace cv;
const string winName = "points";
const int testStep = 5;
-
Mat img, imgDst;
RNG rng;
vector<int> trainedPointsMarkers;
vector<Scalar> classColors;
-#define NBC 0 // normal Bayessian classifier
-#define KNN 0 // k nearest neighbors classifier
-#define SVM 0 // support vectors machine
-#define DT 1 // decision tree
-#define BT 0 // ADA Boost
-#define GBT 0 // gradient boosted trees
-#define RF 0 // random forest
-#define ERT 0 // extremely randomized trees
-#define ANN 0 // artificial neural networks
-#define EM 0 // expectation-maximization
+#define _NBC_ 0 // normal Bayessian classifier
+#define _KNN_ 0 // k nearest neighbors classifier
+#define _SVM_ 0 // support vectors machine
+#define _DT_ 1 // decision tree
+#define _BT_ 0 // ADA Boost
+#define _GBT_ 0 // gradient boosted trees
+#define _RF_ 0 // random forest
+#define _ERT_ 0 // extremely randomized trees
+#define _ANN_ 0 // artificial neural networks
+#define _EM_ 0 // expectation-maximization
void on_mouse( int event, int x, int y, int /*flags*/, void* )
{
}
else if( event == CV_EVENT_RBUTTONUP )
{
-#if BT
+#if _BT_
if( classColors.size() < 2 )
{
#endif
classColors.push_back( Scalar((uchar)rng(256), (uchar)rng(256), (uchar)rng(256)) );
updateFlag = true;
-#if BT
+#if _BT_
}
else
cout << "New class can not be added, because CvBoost can only be used for 2-class classification" << endl;
samples.convertTo( samples, CV_32FC1 );
}
-#if NBC
+#if _NBC_
void find_decision_boundary_NBC()
{
img.copyTo( imgDst );
#endif
-#if KNN
+#if _KNN_
void find_decision_boundary_KNN( int K )
{
img.copyTo( imgDst );
}
#endif
-#if SVM
+#if _SVM_
void find_decision_boundary_SVM( CvSVMParams params )
{
img.copyTo( imgDst );
}
#endif
-#if DT
+#if _DT_
void find_decision_boundary_DT()
{
img.copyTo( imgDst );
}
#endif
-#if BT
+#if _BT_
void find_decision_boundary_BT()
{
img.copyTo( imgDst );
#endif
-#if GBT
+#if _GBT_
void find_decision_boundary_GBT()
{
img.copyTo( imgDst );
#endif
-#if RF
+#if _RF_
void find_decision_boundary_RF()
{
img.copyTo( imgDst );
#endif
-#if ERT
+#if _ERT_
void find_decision_boundary_ERT()
{
img.copyTo( imgDst );
}
#endif
-#if ANN
+#if _ANN_
void find_decision_boundary_ANN( const Mat& layer_sizes )
{
img.copyTo( imgDst );
}
#endif
-#if EM
+#if _EM_
void find_decision_boundary_EM()
{
img.copyTo( imgDst );
Mat trainSamples, trainClasses;
prepare_train_data( trainSamples, trainClasses );
- CvEM em;
- CvEMParams params;
- params.covs = NULL;
- params.means = NULL;
- params.weights = NULL;
- params.probs = NULL;
+ cv::EM em;
+ cv::EM::Params params;
params.nclusters = classColors.size();
- params.cov_mat_type = CvEM::COV_MAT_GENERIC;
- params.start_step = CvEM::START_AUTO_STEP;
- params.term_crit.max_iter = 10;
- params.term_crit.epsilon = 0.1;
- params.term_crit.type = CV_TERMCRIT_ITER | CV_TERMCRIT_EPS;
-
+ params.covMatType = cv::EM::COV_MAT_GENERIC;
+ params.startStep = cv::EM::START_AUTO_STEP;
+ params.termCrit = cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::COUNT, 10, 0.1);
// learn classifier
em.train( trainSamples, Mat(), params, &trainClasses );
if( key == 'r' ) // run
{
-#if NBC
+#if _NBC_
find_decision_boundary_NBC();
cvNamedWindow( "NormalBayesClassifier", WINDOW_AUTOSIZE );
imshow( "NormalBayesClassifier", imgDst );
#endif
-#if KNN
+#if _KNN_
int K = 3;
find_decision_boundary_KNN( K );
namedWindow( "kNN", WINDOW_AUTOSIZE );
imshow( "kNN2", imgDst );
#endif
-#if SVM
+#if _SVM_
//(1)-(2)separable and not sets
CvSVMParams params;
params.svm_type = CvSVM::C_SVC;
imshow( "classificationSVM2", imgDst );
#endif
-#if DT
+#if _DT_
find_decision_boundary_DT();
namedWindow( "DT", WINDOW_AUTOSIZE );
imshow( "DT", imgDst );
#endif
-#if BT
+#if _BT_
find_decision_boundary_BT();
namedWindow( "BT", WINDOW_AUTOSIZE );
imshow( "BT", imgDst);
#endif
-#if GBT
+#if _GBT_
find_decision_boundary_GBT();
namedWindow( "GBT", WINDOW_AUTOSIZE );
imshow( "GBT", imgDst);
#endif
-#if RF
+#if _RF_
find_decision_boundary_RF();
namedWindow( "RF", WINDOW_AUTOSIZE );
imshow( "RF", imgDst);
#endif
-#if ERT
+#if _ERT_
find_decision_boundary_ERT();
namedWindow( "ERT", WINDOW_AUTOSIZE );
imshow( "ERT", imgDst);
#endif
-#if ANN
+#if _ANN_
Mat layer_sizes1( 1, 3, CV_32SC1 );
layer_sizes1.at<int>(0) = 2;
layer_sizes1.at<int>(1) = 5;
imshow( "ANN", imgDst );
#endif
-#if EM
+#if _EM_
find_decision_boundary_EM();
namedWindow( "EM", WINDOW_AUTOSIZE );
imshow( "EM", imgDst );