samples/cpp/train_HOG.cpp

   1 #include <opencv2/opencv.hpp>
   2
   3 #include <string>
   4 #include <iostream>
   5 #include <fstream>
   6 #include <vector>
   7
   8 #include <time.h>
   9
  10 using namespace cv;
  11 using namespace cv::ml;
  12 using namespace std;
  13
  14 void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );
  15 void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData );
  16 void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst );
  17 void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
  18 Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size );
  19 void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size );
  20 void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels );
  21 void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color );
  22 void test_it( const Size & size );
  23
  24 void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
  25 {
  26     // get the support vectors
  27     Mat sv = svm->getSupportVectors();
  28     const int sv_total = sv.rows;
  29     // get the decision function
  30     Mat alpha, svidx;
  31     double rho = svm->getDecisionFunction(0, alpha, svidx);
  32
  33     CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
  34     CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
  35                (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
  36     CV_Assert( sv.type() == CV_32F );
  37     hog_detector.clear();
  38
  39     hog_detector.resize(sv.cols + 1);
  40     memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
  41     hog_detector[sv.cols] = (float)-rho;
  42 }
  43
  44
  45 /*
  46 * Convert training/testing set to be used by OpenCV Machine Learning algorithms.
  47 * TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
  48 * Transposition of samples are made if needed.
  49 */
  50 void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData )
  51 {
  52     //--Convert data
  53     const int rows = (int)train_samples.size();
  54     const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows );
  55     cv::Mat tmp(1, cols, CV_32FC1); //< used for transposition if needed
  56     trainData = cv::Mat(rows, cols, CV_32FC1 );
  57     vector< Mat >::const_iterator itr = train_samples.begin();
  58     vector< Mat >::const_iterator end = train_samples.end();
  59     for( int i = 0 ; itr != end ; ++itr, ++i )
  60     {
  61         CV_Assert( itr->cols == 1 ||
  62             itr->rows == 1 );
  63         if( itr->cols == 1 )
  64         {
  65             transpose( *(itr), tmp );
  66             tmp.copyTo( trainData.row( i ) );
  67         }
  68         else if( itr->rows == 1 )
  69         {
  70             itr->copyTo( trainData.row( i ) );
  71         }
  72     }
  73 }
  74
  75 void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst )
  76 {
  77     string line;
  78     ifstream file;
  79
  80     file.open( (prefix+filename).c_str() );
  81     if( !file.is_open() )
  82     {
  83         cerr << "Unable to open the list of images from " << filename << " filename." << endl;
  84         exit( -1 );
  85     }
  86
  87     bool end_of_parsing = false;
  88     while( !end_of_parsing )
  89     {
  90         getline( file, line );
  91         if( line == "" ) // no more file to read
  92         {
  93             end_of_parsing = true;
  94             break;
  95         }
  96         Mat img = imread( (prefix+line).c_str() ); // load the image
  97         if( img.empty() ) // invalid image, just skip it.
  98             continue;
  99 #ifdef _DEBUG
 100         imshow( "image", img );
 101         waitKey( 10 );
 102 #endif
 103         img_lst.push_back( img.clone() );
 104     }
 105 }
 106
 107 void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size )
 108 {
 109     Rect box;
 110     box.width = size.width;
 111     box.height = size.height;
 112
 113     const int size_x = box.width;
 114     const int size_y = box.height;
 115
 116     srand( (unsigned int)time( NULL ) );
 117
 118     vector< Mat >::const_iterator img = full_neg_lst.begin();
 119     vector< Mat >::const_iterator end = full_neg_lst.end();
 120     for( ; img != end ; ++img )
 121     {
 122         box.x = rand() % (img->cols - size_x);
 123         box.y = rand() % (img->rows - size_y);
 124         Mat roi = (*img)(box);
 125         neg_lst.push_back( roi.clone() );
 126 #ifdef _DEBUG
 127         imshow( "img", roi.clone() );
 128         waitKey( 10 );
 129 #endif
 130     }
 131 }
 132
 133 // From http://www.juergenwiki.de/work/wiki/doku.php?id=public:hog_descriptor_computation_and_visualization
 134 Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorValues, const Size & size )
 135 {
 136     const int DIMX = size.width;
 137     const int DIMY = size.height;
 138     float zoomFac = 3;
 139     Mat visu;
 140     resize(color_origImg, visu, Size( (int)(color_origImg.cols*zoomFac), (int)(color_origImg.rows*zoomFac) ) );
 141
 142     int cellSize        = 8;
 143     int gradientBinSize = 9;
 144     float radRangeForOneBin = (float)(CV_PI/(float)gradientBinSize); // dividing 180° into 9 bins, how large (in rad) is one bin?
 145
 146     // prepare data structure: 9 orientation / gradient strenghts for each cell
 147     int cells_in_x_dir = DIMX / cellSize;
 148     int cells_in_y_dir = DIMY / cellSize;
 149     float*** gradientStrengths = new float**[cells_in_y_dir];
 150     int** cellUpdateCounter   = new int*[cells_in_y_dir];
 151     for (int y=0; y<cells_in_y_dir; y++)
 152     {
 153         gradientStrengths[y] = new float*[cells_in_x_dir];
 154         cellUpdateCounter[y] = new int[cells_in_x_dir];
 155         for (int x=0; x<cells_in_x_dir; x++)
 156         {
 157             gradientStrengths[y][x] = new float[gradientBinSize];
 158             cellUpdateCounter[y][x] = 0;
 159
 160             for (int bin=0; bin<gradientBinSize; bin++)
 161                 gradientStrengths[y][x][bin] = 0.0;
 162         }
 163     }
 164
 165     // nr of blocks = nr of cells - 1
 166     // since there is a new block on each cell (overlapping blocks!) but the last one
 167     int blocks_in_x_dir = cells_in_x_dir - 1;
 168     int blocks_in_y_dir = cells_in_y_dir - 1;
 169
 170     // compute gradient strengths per cell
 171     int descriptorDataIdx = 0;
 172     int cellx = 0;
 173     int celly = 0;
 174
 175     for (int blockx=0; blockx<blocks_in_x_dir; blockx++)
 176     {
 177         for (int blocky=0; blocky<blocks_in_y_dir; blocky++)
 178         {
 179             // 4 cells per block ...
 180             for (int cellNr=0; cellNr<4; cellNr++)
 181             {
 182                 // compute corresponding cell nr
 183                 cellx = blockx;
 184                 celly = blocky;
 185                 if (cellNr==1) celly++;
 186                 if (cellNr==2) cellx++;
 187                 if (cellNr==3)
 188                 {
 189                     cellx++;
 190                     celly++;
 191                 }
 192
 193                 for (int bin=0; bin<gradientBinSize; bin++)
 194                 {
 195                     float gradientStrength = descriptorValues[ descriptorDataIdx ];
 196                     descriptorDataIdx++;
 197
 198                     gradientStrengths[celly][cellx][bin] += gradientStrength;
 199
 200                 } // for (all bins)
 201
 202
 203                 // note: overlapping blocks lead to multiple updates of this sum!
 204                 // we therefore keep track how often a cell was updated,
 205                 // to compute average gradient strengths
 206                 cellUpdateCounter[celly][cellx]++;
 207
 208             } // for (all cells)
 209
 210
 211         } // for (all block x pos)
 212     } // for (all block y pos)
 213
 214
 215     // compute average gradient strengths
 216     for (celly=0; celly<cells_in_y_dir; celly++)
 217     {
 218         for (cellx=0; cellx<cells_in_x_dir; cellx++)
 219         {
 220
 221             float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
 222
 223             // compute average gradient strenghts for each gradient bin direction
 224             for (int bin=0; bin<gradientBinSize; bin++)
 225             {
 226                 gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
 227             }
 228         }
 229     }
 230
 231     // draw cells
 232     for (celly=0; celly<cells_in_y_dir; celly++)
 233     {
 234         for (cellx=0; cellx<cells_in_x_dir; cellx++)
 235         {
 236             int drawX = cellx * cellSize;
 237             int drawY = celly * cellSize;
 238
 239             int mx = drawX + cellSize/2;
 240             int my = drawY + cellSize/2;
 241
 242             rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), Scalar(100,100,100), 1);
 243
 244             // draw in each cell all 9 gradient strengths
 245             for (int bin=0; bin<gradientBinSize; bin++)
 246             {
 247                 float currentGradStrength = gradientStrengths[celly][cellx][bin];
 248
 249                 // no line to draw?
 250                 if (currentGradStrength==0)
 251                     continue;
 252
 253                 float currRad = bin * radRangeForOneBin + radRangeForOneBin/2;
 254
 255                 float dirVecX = cos( currRad );
 256                 float dirVecY = sin( currRad );
 257                 float maxVecLen = (float)(cellSize/2.f);
 258                 float scale = 2.5; // just a visualization scale, to see the lines better
 259
 260                 // compute line coordinates
 261                 float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
 262                 float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
 263                 float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
 264                 float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
 265
 266                 // draw gradient visualization
 267                 line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), Scalar(0,255,0), 1);
 268
 269             } // for (all bins)
 270
 271         } // for (cellx)
 272     } // for (celly)
 273
 274
 275     // don't forget to free memory allocated by helper data structures!
 276     for (int y=0; y<cells_in_y_dir; y++)
 277     {
 278         for (int x=0; x<cells_in_x_dir; x++)
 279         {
 280             delete[] gradientStrengths[y][x];
 281         }
 282         delete[] gradientStrengths[y];
 283         delete[] cellUpdateCounter[y];
 284     }
 285     delete[] gradientStrengths;
 286     delete[] cellUpdateCounter;
 287
 288     return visu;
 289
 290 } // get_hogdescriptor_visu
 291
 292 void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, const Size & size )
 293 {
 294     HOGDescriptor hog;
 295     hog.winSize = size;
 296     Mat gray;
 297     vector< Point > location;
 298     vector< float > descriptors;
 299
 300     vector< Mat >::const_iterator img = img_lst.begin();
 301     vector< Mat >::const_iterator end = img_lst.end();
 302     for( ; img != end ; ++img )
 303     {
 304         cvtColor( *img, gray, COLOR_BGR2GRAY );
 305         hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ), location );
 306         gradient_lst.push_back( Mat( descriptors ).clone() );
 307 #ifdef _DEBUG
 308         imshow( "gradient", get_hogdescriptor_visu( img->clone(), descriptors, size ) );
 309         waitKey( 10 );
 310 #endif
 311     }
 312 }
 313
 314 void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels )
 315 {
 316     /* Default values to train SVM */
 317     SVM::Params params;
 318     params.coef0 = 0.0;
 319     params.degree = 3;
 320     params.termCrit.epsilon = 1e-3;
 321     params.gamma = 0;
 322     params.kernelType = SVM::LINEAR;
 323     params.nu = 0.5;
 324     params.p = 0.1; // for EPSILON_SVR, epsilon in loss function?
 325     params.C = 0.01; // From paper, soft classifier
 326     params.svmType = SVM::EPS_SVR; // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
 327
 328     Mat train_data;
 329     convert_to_ml( gradient_lst, train_data );
 330
 331     clog << "Start training...";
 332     Ptr<SVM> svm = StatModel::train<SVM>(train_data, ROW_SAMPLE, Mat(labels), params);
 333     clog << "...[done]" << endl;
 334
 335     svm->save( "my_people_detector.yml" );
 336 }
 337
 338 void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color )
 339 {
 340     if( !locations.empty() )
 341     {
 342         vector< Rect >::const_iterator loc = locations.begin();
 343         vector< Rect >::const_iterator end = locations.end();
 344         for( ; loc != end ; ++loc )
 345         {
 346             rectangle( img, *loc, color, 2 );
 347         }
 348     }
 349 }
 350
 351 void test_it( const Size & size )
 352 {
 353     char key = 27;
 354     Scalar reference( 0, 255, 0 );
 355     Scalar trained( 0, 0, 255 );
 356     Mat img, draw;
 357     Ptr<SVM> svm;
 358     HOGDescriptor hog;
 359     HOGDescriptor my_hog;
 360     my_hog.winSize = size;
 361     VideoCapture video;
 362     vector< Rect > locations;
 363
 364     // Load the trained SVM.
 365     svm = StatModel::load<SVM>( "my_people_detector.yml" );
 366     // Set the trained svm to my_hog
 367     vector< float > hog_detector;
 368     get_svm_detector( svm, hog_detector );
 369     my_hog.setSVMDetector( hog_detector );
 370     // Set the people detector.
 371     hog.setSVMDetector( hog.getDefaultPeopleDetector() );
 372     // Open the camera.
 373     video.open(0);
 374     if( !video.isOpened() )
 375     {
 376         cerr << "Unable to open the device 0" << endl;
 377         exit( -1 );
 378     }
 379
 380     bool end_of_process = false;
 381     while( !end_of_process )
 382     {
 383         video >> img;
 384         if( img.empty() )
 385             break;
 386
 387         draw = img.clone();
 388
 389         locations.clear();
 390         hog.detectMultiScale( img, locations );
 391         draw_locations( draw, locations, reference );
 392
 393         locations.clear();
 394         my_hog.detectMultiScale( img, locations );
 395         draw_locations( draw, locations, trained );
 396
 397         imshow( "Video", draw );
 398         key = (char)waitKey( 10 );
 399         if( 27 == key )
 400             end_of_process = true;
 401     }
 402 }
 403
 404 int main( int argc, char** argv )
 405 {
 406     if( argc != 4 )
 407     {
 408         cout << "Wrong number of parameters." << endl
 409             << "Usage: " << argv[0] << " pos_dir pos.lst neg_dir neg.lst" << endl
 410             << "example: " << argv[0] << " /INRIA_dataset/ Train/pos.lst /INRIA_dataset/ Train/neg.lst" << endl;
 411         exit( -1 );
 412     }
 413     vector< Mat > pos_lst;
 414     vector< Mat > full_neg_lst;
 415     vector< Mat > neg_lst;
 416     vector< Mat > gradient_lst;
 417     vector< int > labels;
 418
 419     load_images( argv[1], argv[2], pos_lst );
 420     labels.assign( pos_lst.size(), +1 );
 421     const unsigned int old = (unsigned int)labels.size();
 422     load_images( argv[3], argv[4], full_neg_lst );
 423     sample_neg( full_neg_lst, neg_lst, Size( 96,160 ) );
 424     labels.insert( labels.end(), neg_lst.size(), -1 );
 425     CV_Assert( old < labels.size() );
 426
 427     compute_hog( pos_lst, gradient_lst, Size( 96, 160 ) );
 428     compute_hog( neg_lst, gradient_lst, Size( 96, 160 ) );
 429
 430     train_svm( gradient_lst, labels );
 431
 432     test_it( Size( 96, 160 ) ); // change with your parameters
 433
 434     return 0;
 435 }