modules/ml/src/lr.cpp

   1 ///////////////////////////////////////////////////////////////////////////////////////
   2 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   3
   4 //  By downloading, copying, installing or using the software you agree to this license.
   5 //  If you do not agree to this license, do not download, install,
   6 //  copy or use the software.
   7
   8 // This is a implementation of the Logistic Regression algorithm in C++ in OpenCV.
   9
  10 // AUTHOR:
  11 // Rahul Kavi rahulkavi[at]live[at]com
  12
  13 // # You are free to use, change, or redistribute the code in any way you wish for
  14 // # non-commercial purposes, but please maintain the name of the original author.
  15 // # This code comes with no warranty of any kind.
  16
  17 // #
  18 // # You are free to use, change, or redistribute the code in any way you wish for
  19 // # non-commercial purposes, but please maintain the name of the original author.
  20 // # This code comes with no warranty of any kind.
  21
  22 // # Logistic Regression ALGORITHM
  23
  24
  25 //                           License Agreement
  26 //                For Open Source Computer Vision Library
  27
  28 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  29 // Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
  30 // Third party copyrights are property of their respective owners.
  31
  32 // Redistribution and use in source and binary forms, with or without modification,
  33 // are permitted provided that the following conditions are met:
  34
  35 //   * Redistributions of source code must retain the above copyright notice,
  36 //     this list of conditions and the following disclaimer.
  37
  38 //   * Redistributions in binary form must reproduce the above copyright notice,
  39 //     this list of conditions and the following disclaimer in the documentation
  40 //     and/or other materials provided with the distribution.
  41
  42 //   * The name of the copyright holders may not be used to endorse or promote products
  43 //     derived from this software without specific prior written permission.
  44
  45 // This software is provided by the copyright holders and contributors "as is" and
  46 // any express or implied warranties, including, but not limited to, the implied
  47 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  48 // In no event shall the Intel Corporation or contributors be liable for any direct,
  49 // indirect, incidental, special, exemplary, or consequential damages
  50 // (including, but not limited to, procurement of substitute goods or services;
  51 // loss of use, data, or profits; or business interruption) however caused
  52 // and on any theory of liability, whether in contract, strict liability,
  53 // or tort (including negligence or otherwise) arising in any way out of
  54 // the use of this software, even if advised of the possibility of such damage.
  55
  56 #include "precomp.hpp"
  57
  58 using namespace std;
  59
  60 namespace cv {
  61 namespace ml {
  62
  63 LogisticRegression::Params::Params(double learning_rate,
  64                                    int iters,
  65                                    int method,
  66                                    int normlization,
  67                                    int reg,
  68                                    int batch_size)
  69 {
  70     alpha = learning_rate;
  71     num_iters = iters;
  72     norm = normlization;
  73     regularized = reg;
  74     train_method = method;
  75     mini_batch_size = batch_size;
  76     term_crit = cv::TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
  77 }
  78
  79 class LogisticRegressionImpl : public LogisticRegression
  80 {
  81 public:
  82     LogisticRegressionImpl(const Params& pms)
  83         : params(pms)
  84     {
  85     }
  86     virtual ~LogisticRegressionImpl() {}
  87
  88     virtual bool train( const Ptr<TrainData>& trainData, int=0 );
  89     virtual float predict(InputArray samples, OutputArray results, int) const;
  90     virtual void clear();
  91     virtual void write(FileStorage& fs) const;
  92     virtual void read(const FileNode& fn);
  93     virtual cv::Mat get_learnt_thetas() const;
  94     virtual int getVarCount() const { return learnt_thetas.cols; }
  95     virtual bool isTrained() const { return !learnt_thetas.empty(); }
  96     virtual bool isClassifier() const { return true; }
  97     virtual String getDefaultModelName() const { return "opencv_ml_lr"; }
  98 protected:
  99     cv::Mat calc_sigmoid(const cv::Mat& data) const;
 100     double compute_cost(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta);
 101     cv::Mat compute_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta);
 102     cv::Mat compute_mini_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta);
 103     bool set_label_map(const cv::Mat& _labels_i);
 104     cv::Mat remap_labels(const cv::Mat& _labels_i, const map<int, int>& lmap) const;
 105 protected:
 106     Params params;
 107     cv::Mat learnt_thetas;
 108     map<int, int> forward_mapper;
 109     map<int, int> reverse_mapper;
 110     cv::Mat labels_o;
 111     cv::Mat labels_n;
 112 };
 113
 114 Ptr<LogisticRegression> LogisticRegression::create(const Params& params)
 115 {
 116     return makePtr<LogisticRegressionImpl>(params);
 117 }
 118
 119 bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
 120 {
 121     clear();
 122     cv::Mat _data_i = trainData->getSamples();
 123     cv::Mat _labels_i = trainData->getResponses();
 124
 125     CV_Assert( !_labels_i.empty() && !_data_i.empty());
 126
 127     // check the number of columns
 128     if(_labels_i.cols != 1)
 129     {
 130         CV_Error( CV_StsBadArg, "_labels_i should be a column matrix" );
 131     }
 132
 133     // check data type.
 134     // data should be of floating type CV_32FC1
 135
 136     if((_data_i.type() != CV_32FC1) || (_labels_i.type() != CV_32FC1))
 137     {
 138         CV_Error( CV_StsBadArg, "data and labels must be a floating point matrix" );
 139     }
 140
 141     bool ok = false;
 142
 143     cv::Mat labels;
 144
 145     set_label_map(_labels_i);
 146     int num_classes = (int) this->forward_mapper.size();
 147
 148     // add a column of ones
 149     cv::Mat data_t = cv::Mat::zeros(_data_i.rows, _data_i.cols+1, CV_32F);
 150     vconcat(cv::Mat(_data_i.rows, 1, _data_i.type(), Scalar::all(1.0)), data_t.col(0));
 151
 152     for (int i=1;i<data_t.cols;i++)
 153     {
 154         vconcat(_data_i.col(i-1), data_t.col(i));
 155     }
 156
 157     if(num_classes < 2)
 158     {
 159         CV_Error( CV_StsBadArg, "data should have atleast 2 classes" );
 160     }
 161
 162     if(_labels_i.rows != _data_i.rows)
 163     {
 164         CV_Error( CV_StsBadArg, "number of rows in data and labels should be the equal" );
 165     }
 166
 167
 168     cv::Mat thetas = cv::Mat::zeros(num_classes, data_t.cols, CV_32F);
 169     cv::Mat init_theta = cv::Mat::zeros(data_t.cols, 1, CV_32F);
 170
 171     cv::Mat labels_l = remap_labels(_labels_i, this->forward_mapper);
 172     cv::Mat new_local_labels;
 173
 174     int ii=0;
 175     cv::Mat new_theta;
 176
 177     if(num_classes == 2)
 178     {
 179         labels_l.convertTo(labels, CV_32F);
 180         if(this->params.train_method == LogisticRegression::BATCH)
 181             new_theta = compute_batch_gradient(data_t, labels, init_theta);
 182         else
 183             new_theta = compute_mini_batch_gradient(data_t, labels, init_theta);
 184         thetas = new_theta.t();
 185     }
 186     else
 187     {
 188         /* take each class and rename classes you will get a theta per class
 189         as in multi class class scenario, we will have n thetas for n classes */
 190         ii = 0;
 191
 192         for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
 193         {
 194             new_local_labels = (labels_l == it->second)/255;
 195             new_local_labels.convertTo(labels, CV_32F);
 196             if(this->params.train_method == LogisticRegression::BATCH)
 197                 new_theta = compute_batch_gradient(data_t, labels, init_theta);
 198             else
 199                 new_theta = compute_mini_batch_gradient(data_t, labels, init_theta);
 200             hconcat(new_theta.t(), thetas.row(ii));
 201             ii += 1;
 202         }
 203     }
 204
 205     this->learnt_thetas = thetas.clone();
 206     if( cvIsNaN( (double)cv::sum(this->learnt_thetas)[0] ) )
 207     {
 208         CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
 209     }
 210     ok = true;
 211     return ok;
 212 }
 213
 214 float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int) const
 215 {
 216     /* returns a class of the predicted class
 217     class names can be 1,2,3,4, .... etc */
 218     cv::Mat thetas, data, pred_labs;
 219     data = samples.getMat();
 220
 221     // check if learnt_mats array is populated
 222     if(this->learnt_thetas.total()<=0)
 223     {
 224         CV_Error( CV_StsBadArg, "classifier should be trained first" );
 225     }
 226     if(data.type() != CV_32F)
 227     {
 228         CV_Error( CV_StsBadArg, "data must be of floating type" );
 229     }
 230
 231     // add a column of ones
 232     cv::Mat data_t = cv::Mat::zeros(data.rows, data.cols+1, CV_32F);
 233     for (int i=0;i<data_t.cols;i++)
 234     {
 235         if(i==0)
 236         {
 237             vconcat(cv::Mat(data.rows, 1, data.type(), Scalar::all(1.0)), data_t.col(i));
 238             continue;
 239         }
 240         vconcat(data.col(i-1), data_t.col(i));
 241     }
 242
 243     this->learnt_thetas.convertTo(thetas, CV_32F);
 244
 245     CV_Assert(thetas.rows > 0);
 246
 247     double min_val;
 248     double max_val;
 249
 250     Point min_loc;
 251     Point max_loc;
 252
 253     cv::Mat labels;
 254     cv::Mat labels_c;
 255     cv::Mat temp_pred;
 256     cv::Mat pred_m = cv::Mat::zeros(data_t.rows, thetas.rows, data.type());
 257
 258     if(thetas.rows == 1)
 259     {
 260         temp_pred = calc_sigmoid(data_t*thetas.t());
 261         CV_Assert(temp_pred.cols==1);
 262
 263         // if greater than 0.5, predict class 0 or predict class 1
 264         temp_pred = (temp_pred>0.5)/255;
 265         temp_pred.convertTo(labels_c, CV_32S);
 266     }
 267     else
 268     {
 269         for(int i = 0;i<thetas.rows;i++)
 270         {
 271             temp_pred = calc_sigmoid(data_t * thetas.row(i).t());
 272             cv::vconcat(temp_pred, pred_m.col(i));
 273         }
 274         for(int i = 0;i<pred_m.rows;i++)
 275         {
 276             temp_pred = pred_m.row(i);
 277             minMaxLoc( temp_pred, &min_val, &max_val, &min_loc, &max_loc, Mat() );
 278             labels.push_back(max_loc.x);
 279         }
 280         labels.convertTo(labels_c, CV_32S);
 281     }
 282     pred_labs = remap_labels(labels_c, this->reverse_mapper);
 283     // convert pred_labs to integer type
 284     pred_labs.convertTo(pred_labs, CV_32S);
 285     pred_labs.copyTo(results);
 286     // TODO: determine
 287     return 0;
 288 }
 289
 290 cv::Mat LogisticRegressionImpl::calc_sigmoid(const cv::Mat& data) const
 291 {
 292     cv::Mat dest;
 293     cv::exp(-data, dest);
 294     return 1.0/(1.0+dest);
 295 }
 296
 297 double LogisticRegressionImpl::compute_cost(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
 298 {
 299     int llambda = 0;
 300     int m;
 301     int n;
 302     double cost = 0;
 303     double rparameter = 0;
 304     cv::Mat gradient;
 305     cv::Mat theta_b;
 306     cv::Mat theta_c;
 307     cv::Mat d_a;
 308     cv::Mat d_b;
 309
 310     m = _data.rows;
 311     n = _data.cols;
 312
 313     gradient = cv::Mat::zeros( _init_theta.rows, _init_theta.cols, _init_theta.type());
 314     theta_b = _init_theta(Range(1, n), Range::all());
 315     cv::multiply(theta_b, theta_b, theta_c, 1);
 316
 317     if(this->params.regularized > 0)
 318     {
 319         llambda = 1;
 320     }
 321
 322     if(this->params.norm == LogisticRegression::REG_L1)
 323     {
 324         rparameter = (llambda/(2*m)) * cv::sum(theta_b)[0];
 325     }
 326     else
 327     {
 328         // assuming it to be L2 by default
 329         rparameter = (llambda/(2*m)) * cv::sum(theta_c)[0];
 330     }
 331
 332     d_a = calc_sigmoid(_data* _init_theta);
 333
 334
 335     cv::log(d_a, d_a);
 336     cv::multiply(d_a, _labels, d_a);
 337
 338     d_b = 1 - calc_sigmoid(_data * _init_theta);
 339     cv::log(d_b, d_b);
 340     cv::multiply(d_b, 1-_labels, d_b);
 341
 342     cost = (-1.0/m) * (cv::sum(d_a)[0] + cv::sum(d_b)[0]);
 343     cost = cost + rparameter;
 344
 345     return cost;
 346 }
 347
 348 cv::Mat LogisticRegressionImpl::compute_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
 349 {
 350     // implements batch gradient descent
 351     if(this->params.alpha<=0)
 352     {
 353         CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
 354     }
 355
 356     if(this->params.num_iters <= 0)
 357     {
 358         CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
 359     }
 360
 361     int llambda = 0;
 362     double ccost;
 363     int m, n;
 364     cv::Mat pcal_a;
 365     cv::Mat pcal_b;
 366     cv::Mat pcal_ab;
 367     cv::Mat gradient;
 368     cv::Mat theta_p = _init_theta.clone();
 369     m = _data.rows;
 370     n = _data.cols;
 371
 372     if(this->params.regularized > 0)
 373     {
 374         llambda = 1;
 375     }
 376
 377     for(int i = 0;i<this->params.num_iters;i++)
 378     {
 379         ccost = compute_cost(_data, _labels, theta_p);
 380
 381         if( cvIsNaN( ccost ) )
 382         {
 383             CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
 384         }
 385
 386         pcal_b = calc_sigmoid((_data*theta_p) - _labels);
 387
 388         pcal_a = (static_cast<double>(1/m)) * _data.t();
 389
 390         gradient = pcal_a * pcal_b;
 391
 392         pcal_a = calc_sigmoid(_data*theta_p) - _labels;
 393
 394         pcal_b = _data(Range::all(), Range(0,1));
 395
 396         cv::multiply(pcal_a, pcal_b, pcal_ab, 1);
 397
 398         gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];
 399
 400         pcal_b = _data(Range::all(), Range(1,n));
 401
 402         //cout<<"for each training data entry"<<endl;
 403         for(int ii = 1;ii<gradient.rows;ii++)
 404         {
 405             pcal_b = _data(Range::all(), Range(ii,ii+1));
 406
 407             cv::multiply(pcal_a, pcal_b, pcal_ab, 1);
 408
 409             gradient.row(ii) = (1.0/m)*cv::sum(pcal_ab)[0] + (llambda/m) * theta_p.row(ii);
 410         }
 411
 412         theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
 413     }
 414     return theta_p;
 415 }
 416
 417 cv::Mat LogisticRegressionImpl::compute_mini_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
 418 {
 419     // implements batch gradient descent
 420     int lambda_l = 0;
 421     double ccost;
 422     int m, n;
 423     int j = 0;
 424     int size_b = this->params.mini_batch_size;
 425
 426     if(this->params.mini_batch_size <= 0 || this->params.alpha == 0)
 427     {
 428         CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
 429     }
 430
 431     if(this->params.num_iters <= 0)
 432     {
 433         CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
 434     }
 435
 436     cv::Mat pcal_a;
 437     cv::Mat pcal_b;
 438     cv::Mat pcal_ab;
 439     cv::Mat gradient;
 440     cv::Mat theta_p = _init_theta.clone();
 441     cv::Mat data_d;
 442     cv::Mat labels_l;
 443
 444     if(this->params.regularized > 0)
 445     {
 446         lambda_l = 1;
 447     }
 448
 449     for(int i = 0;this->params.term_crit.maxCount;i++)
 450     {
 451         if(j+size_b<=_data.rows)
 452         {
 453             data_d = _data(Range(j,j+size_b), Range::all());
 454             labels_l = _labels(Range(j,j+size_b),Range::all());
 455         }
 456         else
 457         {
 458             data_d = _data(Range(j, _data.rows), Range::all());
 459             labels_l = _labels(Range(j, _labels.rows),Range::all());
 460         }
 461
 462         m = data_d.rows;
 463         n = data_d.cols;
 464
 465         ccost = compute_cost(data_d, labels_l, theta_p);
 466
 467         if( cvIsNaN( ccost ) == 1)
 468         {
 469             CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
 470         }
 471
 472         pcal_b = calc_sigmoid((data_d*theta_p) - labels_l);
 473
 474         pcal_a = (static_cast<double>(1/m)) * data_d.t();
 475
 476         gradient = pcal_a * pcal_b;
 477
 478         pcal_a = calc_sigmoid(data_d*theta_p) - labels_l;
 479
 480         pcal_b = data_d(Range::all(), Range(0,1));
 481
 482         cv::multiply(pcal_a, pcal_b, pcal_ab, 1);
 483
 484         gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];
 485
 486         pcal_b = data_d(Range::all(), Range(1,n));
 487
 488         for(int k = 1;k<gradient.rows;k++)
 489         {
 490             pcal_b = data_d(Range::all(), Range(k,k+1));
 491             cv::multiply(pcal_a, pcal_b, pcal_ab, 1);
 492             gradient.row(k) = (1.0/m)*cv::sum(pcal_ab)[0] + (lambda_l/m) * theta_p.row(k);
 493         }
 494
 495         theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
 496
 497         j+=this->params.mini_batch_size;
 498
 499         if(j+size_b>_data.rows)
 500         {
 501             // if parsed through all data variables
 502             break;
 503         }
 504     }
 505     return theta_p;
 506 }
 507
 508 bool LogisticRegressionImpl::set_label_map(const cv::Mat &_labels_i)
 509 {
 510     // this function creates two maps to map user defined labels to program friendly labels two ways.
 511     int ii = 0;
 512     cv::Mat labels;
 513     bool ok = false;
 514
 515     this->labels_o = cv::Mat(0,1, CV_8U);
 516     this->labels_n = cv::Mat(0,1, CV_8U);
 517
 518     _labels_i.convertTo(labels, CV_32S);
 519
 520     for(int i = 0;i<labels.rows;i++)
 521     {
 522         this->forward_mapper[labels.at<int>(i)] += 1;
 523     }
 524
 525     for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
 526     {
 527         this->forward_mapper[it->first] = ii;
 528         this->labels_o.push_back(it->first);
 529         this->labels_n.push_back(ii);
 530         ii += 1;
 531     }
 532
 533     for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
 534     {
 535         this->reverse_mapper[it->second] = it->first;
 536     }
 537     ok = true;
 538
 539     return ok;
 540 }
 541
 542 cv::Mat LogisticRegressionImpl::remap_labels(const cv::Mat& _labels_i, const map<int, int>& lmap) const
 543 {
 544     cv::Mat labels;
 545     _labels_i.convertTo(labels, CV_32S);
 546
 547     cv::Mat new_labels = cv::Mat::zeros(labels.rows, labels.cols, labels.type());
 548
 549     CV_Assert( lmap.size() > 0 );
 550
 551     for(int i =0;i<labels.rows;i++)
 552     {
 553         new_labels.at<int>(i,0) = lmap.find(labels.at<int>(i,0))->second;
 554     }
 555     return new_labels;
 556 }
 557
 558 void LogisticRegressionImpl::clear()
 559 {
 560     this->learnt_thetas.release();
 561     this->labels_o.release();
 562     this->labels_n.release();
 563 }
 564
 565 void LogisticRegressionImpl::write(FileStorage& fs) const
 566 {
 567     // check if open
 568     if(fs.isOpened() == 0)
 569     {
 570         CV_Error(CV_StsBadArg,"file can't open. Check file path");
 571     }
 572     string desc = "Logisitic Regression Classifier";
 573     fs<<"classifier"<<desc.c_str();
 574     fs<<"alpha"<<this->params.alpha;
 575     fs<<"iterations"<<this->params.num_iters;
 576     fs<<"norm"<<this->params.norm;
 577     fs<<"regularized"<<this->params.regularized;
 578     fs<<"train_method"<<this->params.train_method;
 579     if(this->params.train_method == LogisticRegression::MINI_BATCH)
 580     {
 581         fs<<"mini_batch_size"<<this->params.mini_batch_size;
 582     }
 583     fs<<"learnt_thetas"<<this->learnt_thetas;
 584     fs<<"n_labels"<<this->labels_n;
 585     fs<<"o_labels"<<this->labels_o;
 586 }
 587
 588 void LogisticRegressionImpl::read(const FileNode& fn)
 589 {
 590     // check if empty
 591     if(fn.empty())
 592     {
 593         CV_Error( CV_StsBadArg, "empty FileNode object" );
 594     }
 595
 596     this->params.alpha = (double)fn["alpha"];
 597     this->params.num_iters = (int)fn["iterations"];
 598     this->params.norm = (int)fn["norm"];
 599     this->params.regularized = (int)fn["regularized"];
 600     this->params.train_method = (int)fn["train_method"];
 601
 602     if(this->params.train_method == LogisticRegression::MINI_BATCH)
 603     {
 604         this->params.mini_batch_size = (int)fn["mini_batch_size"];
 605     }
 606
 607     fn["learnt_thetas"] >> this->learnt_thetas;
 608     fn["o_labels"] >> this->labels_o;
 609     fn["n_labels"] >> this->labels_n;
 610
 611     for(int ii =0;ii<labels_o.rows;ii++)
 612     {
 613         this->forward_mapper[labels_o.at<int>(ii,0)] = labels_n.at<int>(ii,0);
 614         this->reverse_mapper[labels_n.at<int>(ii,0)] = labels_o.at<int>(ii,0);
 615     }
 616 }
 617
 618 cv::Mat LogisticRegressionImpl::get_learnt_thetas() const
 619 {
 620     return this->learnt_thetas;
 621 }
 622
 623 }
 624 }
 625
 626 /* End of file. */