From 93155c6ae049bc3d8f5b99b0f1606ba88ef53ed8 Mon Sep 17 00:00:00 2001 From: Kevin Date: Wed, 22 Aug 2012 23:21:49 -0400 Subject: [PATCH] Overloaded PCA constructor and ( ) operator to implement Feature#2287 - PCA that retains a specified amount of variance from the data. A sample was added to samples/cpp to demonstrate the new functionality. Docs and Tests were also updated --- modules/core/include/opencv2/core/core.hpp | 5 + modules/core/src/matmul.cpp | 117 ++++++++++++++++++ modules/core/test/test_mat.cpp | 33 +++++- samples/cpp/pca.cpp | 184 +++++++++++++++++++++++++++++ 4 files changed, 335 insertions(+), 4 deletions(-) create mode 100644 samples/cpp/pca.cpp diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 5fd9272..f03f21b 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -2359,8 +2359,10 @@ public: PCA(); //! the constructor that performs PCA PCA(InputArray data, InputArray mean, int flags, int maxComponents=0); + PCA(InputArray data, InputArray mean, int flags, double retainedVariance); //! operator that performs PCA. The previously stored data, if any, is released PCA& operator()(InputArray data, InputArray mean, int flags, int maxComponents=0); + PCA& operator()(InputArray data, InputArray mean, int flags, double retainedVariance); //! projects vector from the original space to the principal components subspace Mat project(InputArray vec) const; //! projects vector from the original space to the principal components subspace @@ -2378,6 +2380,9 @@ public: CV_EXPORTS_W void PCACompute(InputArray data, CV_OUT InputOutputArray mean, OutputArray eigenvectors, int maxComponents=0); +CV_EXPORTS_W void PCACompute(InputArray data, CV_OUT InputOutputArray mean, + OutputArray eigenvectors, double retainedVariance); + CV_EXPORTS_W void PCAProject(InputArray data, InputArray mean, InputArray eigenvectors, OutputArray result); diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index fe138f0..b71aead 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2811,6 +2811,11 @@ PCA::PCA(InputArray data, InputArray _mean, int flags, int maxComponents) operator()(data, _mean, flags, maxComponents); } +PCA::PCA(InputArray data, InputArray _mean, int flags, double retainedVariance) +{ + operator()(data, _mean, flags, retainedVariance); +} + PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComponents) { Mat data = _data.getMat(), _mean = __mean.getMat(); @@ -2895,6 +2900,109 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp return *this; } +PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, double retainedVariance) +{ + Mat data = _data.getMat(), _mean = __mean.getMat(); + int covar_flags = CV_COVAR_SCALE; + int i, len, in_count; + Size mean_sz; + + CV_Assert( data.channels() == 1 ); + if( flags & CV_PCA_DATA_AS_COL ) + { + len = data.rows; + in_count = data.cols; + covar_flags |= CV_COVAR_COLS; + mean_sz = Size(1, len); + } + else + { + len = data.cols; + in_count = data.rows; + covar_flags |= CV_COVAR_ROWS; + mean_sz = Size(len, 1); + } + + CV_Assert( retainedVariance > 0 && retainedVariance <= 1 ); + + int count = std::min(len, in_count); + + // "scrambled" way to compute PCA (when cols(A)>rows(A)): + // B = A'A; B*x=b*x; C = AA'; C*y=c*y -> AA'*y=c*y -> A'A*(A'*y)=c*(A'*y) -> c = b, x=A'*y + if( len <= in_count ) + covar_flags |= CV_COVAR_NORMAL; + + int ctype = std::max(CV_32F, data.depth()); + mean.create( mean_sz, ctype ); + + Mat covar( count, count, ctype ); + + if( _mean.data ) + { + CV_Assert( _mean.size() == mean_sz ); + _mean.convertTo(mean, ctype); + } + + calcCovarMatrix( data, covar, mean, covar_flags, ctype ); + eigen( covar, eigenvalues, eigenvectors ); + + if( !(covar_flags & CV_COVAR_NORMAL) ) + { + // CV_PCA_DATA_AS_ROW: cols(A)>rows(A). x=A'*y -> x'=y'*A + // CV_PCA_DATA_AS_COL: rows(A)>cols(A). x=A''*y -> x'=y'*A' + Mat tmp_data, tmp_mean = repeat(mean, data.rows/mean.rows, data.cols/mean.cols); + if( data.type() != ctype || tmp_mean.data == mean.data ) + { + data.convertTo( tmp_data, ctype ); + subtract( tmp_data, tmp_mean, tmp_data ); + } + else + { + subtract( data, tmp_mean, tmp_mean ); + tmp_data = tmp_mean; + } + + Mat evects1(count, len, ctype); + gemm( eigenvectors, tmp_data, 1, Mat(), 0, evects1, + (flags & CV_PCA_DATA_AS_COL) ? CV_GEMM_B_T : 0); + eigenvectors = evects1; + + // normalize all eigenvectors + for( i = 0; i < eigenvectors.rows; i++ ) + { + Mat vec = eigenvectors.row(i); + normalize(vec, vec); + } + } + + // compute the cumulative energy content for each eigenvector + Mat g(eigenvalues.size(), ctype); + + for(int ig = 0; ig < g.rows; ig++) + { + g.at(ig,0) = 0; + for(int im = 0; im <= ig; im++) + { + g.at(ig,0) += eigenvalues.at(im,0); + } + } + + int L; + for(L = 0; L < eigenvalues.rows; L++) + { + double energy = g.at(L, 0) / g.at(g.rows - 1, 0); + if(energy > retainedVariance) + break; + } + + L = std::max(2, L); + + // use clone() to physically copy the data and thus deallocate the original matrices + eigenvalues = eigenvalues.rowRange(0,L).clone(); + eigenvectors = eigenvectors.rowRange(0,L).clone(); + + return *this; +} void PCA::project(InputArray _data, OutputArray result) const { @@ -2965,6 +3073,15 @@ void cv::PCACompute(InputArray data, InputOutputArray mean, pca.eigenvectors.copyTo(eigenvectors); } +void cv::PCACompute(InputArray data, InputOutputArray mean, + OutputArray eigenvectors, double retainedVariance) +{ + PCA pca; + pca(data, mean, 0, retainedVariance); + pca.mean.copyTo(mean); + pca.eigenvectors.copyTo(eigenvectors); +} + void cv::PCAProject(InputArray data, InputArray mean, InputArray eigenvectors, OutputArray result) { diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 6b5d09f..44ab9a9 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -297,6 +297,7 @@ protected: prjEps, backPrjEps, evalEps, evecEps; int maxComponents = 100; + double retainedVariance = 0.95; Mat rPoints(sz, CV_32FC1), rTestPoints(sz, CV_32FC1); RNG& rng = ts->get_rng(); @@ -423,9 +424,33 @@ protected: ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY ); return; } - + + // 3. check C++ PCA w/retainedVariance + cPCA( rPoints.t(), Mat(), CV_PCA_DATA_AS_COL, retainedVariance ); + diffPrjEps = 1, diffBackPrjEps = 1; + Mat rvPrjTestPoints = cPCA.project(rTestPoints.t()); + + if( cPCA.eigenvectors.rows > maxComponents) + err = norm(cv::abs(rvPrjTestPoints.rowRange(0,maxComponents)), cv::abs(rPrjTestPoints.t()), CV_RELATIVE_L2 ); + else + err = norm(cv::abs(rvPrjTestPoints), cv::abs(rPrjTestPoints.colRange(0,cPCA.eigenvectors.rows).t()), CV_RELATIVE_L2 ); + + if( err > diffPrjEps ) + { + ts->printf( cvtest::TS::LOG, "bad accuracy of project() (CV_PCA_DATA_AS_COL); retainedVariance=0.95; err = %f\n", err ); + ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY ); + return; + } + err = norm(cPCA.backProject(rvPrjTestPoints), rBackPrjTestPoints.t(), CV_RELATIVE_L2 ); + if( err > diffBackPrjEps ) + { + ts->printf( cvtest::TS::LOG, "bad accuracy of backProject() (CV_PCA_DATA_AS_COL); retainedVariance=0.95; err = %f\n", err ); + ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY ); + return; + } + #ifdef CHECK_C - // 3. check C PCA & ROW + // 4. check C PCA & ROW _points = rPoints; _testPoints = rTestPoints; _avg = avg; @@ -455,7 +480,7 @@ protected: return; } - // 3. check C PCA & COL + // 5. check C PCA & COL _points = cPoints; _testPoints = cTestPoints; avg = avg.t(); _avg = avg; @@ -871,4 +896,4 @@ TEST(Core_Mat, reshape_1942) cn = M.channels(); ); ASSERT_EQ(1, cn); -} \ No newline at end of file +} diff --git a/samples/cpp/pca.cpp b/samples/cpp/pca.cpp new file mode 100644 index 0000000..b489a5e --- /dev/null +++ b/samples/cpp/pca.cpp @@ -0,0 +1,184 @@ +/* +* pca.cpp +* +* Author: +* Kevin Hughes +* +* Special Thanks to: +* Philipp Wagner +* +* This program demonstrates how to use OpenCV PCA with a +* specified amount of variance to retain. The effect +* is illustrated further by using a trackbar to +* change the value for retained varaince. +* +* The program takes as input a text file with each line +* begin the full path to an image. PCA will be performed +* on this list of images. The author recommends using +* the first 15 faces of the AT&T face data set: +* http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html +* +* so for example your input text file would look like this: +* +* /orl_faces/s1/1.pgm +* /orl_faces/s2/1.pgm +* /orl_faces/s3/1.pgm +* /orl_faces/s4/1.pgm +* /orl_faces/s5/1.pgm +* /orl_faces/s6/1.pgm +* /orl_faces/s7/1.pgm +* /orl_faces/s8/1.pgm +* /orl_faces/s9/1.pgm +* /orl_faces/s10/1.pgm +* /orl_faces/s11/1.pgm +* /orl_faces/s12/1.pgm +* /orl_faces/s13/1.pgm +* /orl_faces/s14/1.pgm +* /orl_faces/s15/1.pgm +* +*/ + +#include +#include +#include + +#include +#include + +using namespace cv; +using namespace std; + +/////////////////////// +// Functions +void read_imgList(const string& filename, vector& images) { + std::ifstream file(filename.c_str(), ifstream::in); + if (!file) { + string error_message = "No valid input file was given, please check the given filename."; + CV_Error(CV_StsBadArg, error_message); + } + string line; + while (getline(file, line)) { + images.push_back(imread(line, 0)); + } +} + +Mat formatImagesForPCA(const vector &data) +{ + Mat dst(data.size(), data[0].rows*data[0].cols, CV_32F); + for(unsigned int i = 0; i < data.size(); i++) + { + Mat image_row = data[i].clone().reshape(1,1); + Mat row_i = dst.row(i); + image_row.convertTo(row_i,CV_32F); + } + return dst; +} + +Mat toGrayscale(InputArray _src) { + Mat src = _src.getMat(); + // only allow one channel + if(src.channels() != 1) { + CV_Error(CV_StsBadArg, "Only Matrices with one channel are supported"); + } + // create and return normalized image + Mat dst; + cv::normalize(_src, dst, 0, 255, NORM_MINMAX, CV_8UC1); + return dst; +} + +struct params +{ + Mat data; + int ch; + int rows; + PCA pca; + string winName; +}; + +void onTrackbar(int pos, void* ptr) +{ + cout << "Retained Variance = " << pos << "% "; + cout << "re-calculating PCA..." << std::flush; + + double var = pos / 100.0; + + struct params *p = (struct params *)ptr; + + p->pca = PCA(p->data, cv::Mat(), CV_PCA_DATA_AS_ROW, var); + + Mat point = p->pca.project(p->data.row(0)); + Mat reconstruction = p->pca.backProject(point); + reconstruction = reconstruction.reshape(p->ch, p->rows); + reconstruction = toGrayscale(reconstruction); + + imshow(p->winName, reconstruction); + cout << "done! # of principal components: " << p->pca.eigenvectors.rows << endl; +} + + +/////////////////////// +// Main +int main(int argc, char** argv) +{ + if (argc != 2) { + cout << "usage: " << argv[0] << " " << endl; + exit(1); + } + + // Get the path to your CSV. + string imgList = string(argv[1]); + + // vector to hold the images + vector images; + + // Read in the data. This can fail if not valid + try { + read_imgList(imgList, images); + } catch (cv::Exception& e) { + cerr << "Error opening file \"" << imgList << "\". Reason: " << e.msg << endl; + exit(1); + } + + // Quit if there are not enough images for this demo. + if(images.size() <= 1) { + string error_message = "This demo needs at least 2 images to work. Please add more images to your data set!"; + CV_Error(CV_StsError, error_message); + } + + // Reshape and stack images into a rowMatrix + Mat data = formatImagesForPCA(images); + + // perform PCA + PCA pca(data, cv::Mat(), CV_PCA_DATA_AS_ROW, 0.95); // trackbar is initially set here, also this is a common value for retainedVariance + + // Demonstration of the effect of retainedVariance on the first image + Mat point = pca.project(data.row(0)); // project into the eigenspace, thus the image becomes a "point" + Mat reconstruction = pca.backProject(point); // re-create the image from the "point" + reconstruction = reconstruction.reshape(images[0].channels(), images[0].rows); // reshape from a row vector into image shape + reconstruction = toGrayscale(reconstruction); // re-scale for displaying purposes + + // init highgui window + string winName = "Reconstruction | press 'q' to quit"; + namedWindow(winName, CV_WINDOW_NORMAL); + + // params struct to pass to the trackbar handler + params p; + p.data = data; + p.ch = images[0].channels(); + p.rows = images[0].rows; + p.pca = pca; + p.winName = winName; + + // create the tracbar + int pos = 95; + createTrackbar("Retained Variance (%)", winName, &pos, 100, onTrackbar, (void*)&p); + + // display until user presses q + imshow(winName, reconstruction); + + char key = 0; + while(key != 'q') + key = waitKey(); + + return 0; +} -- 2.7.4