Reworked ML logistic regression implementation, initial version

author Maksim Shabunin <maksim.shabunin@itseez.com>

Thu, 14 Aug 2014 15:01:45 +0000 (19:01 +0400)

committer Maksim Shabunin <maksim.shabunin@itseez.com>

Mon, 18 Aug 2014 15:06:58 +0000 (19:06 +0400)
author Maksim Shabunin <maksim.shabunin@itseez.com>
Thu, 14 Aug 2014 15:01:45 +0000 (19:01 +0400)
committer Maksim Shabunin <maksim.shabunin@itseez.com>
Mon, 18 Aug 2014 15:06:58 +0000 (19:06 +0400)
diff --git a/modules/ml/include/opencv2/ml.hpp b/modules/ml/include/opencv2/ml.hpp

index f6e3bf2..145eedb 100644 (file)
--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
@@ -571,56 +571,43 @@ public:
  /****************************************************************************************\
  *                           Logistic Regression                                          *
  \****************************************************************************************/
-struct CV_EXPORTS LogisticRegressionParams
-{
-    double alpha;
-    int num_iters;
-    int norm;
-    int regularized;
-    int train_method;
-    int mini_batch_size;
-    cv::TermCriteria term_crit;
-
-    LogisticRegressionParams();
-    LogisticRegressionParams(double learning_rate, int iters, int train_method, int normlization, int reg, int mini_batch_size);
-};
  
-class CV_EXPORTS LogisticRegression
+class CV_EXPORTS LogisticRegression : public StatModel
  {
  public:
-    LogisticRegression( const LogisticRegressionParams& params = LogisticRegressionParams());
-    LogisticRegression(cv::InputArray data_ip, cv::InputArray labels_ip, const LogisticRegressionParams& params);
-    virtual ~LogisticRegression();
+    class CV_EXPORTS Params
+    {
+    public:
+        Params(double learning_rate = 0.001,
+               int iters = 1000,
+               int method = LogisticRegression::BATCH,
+               int normlization = LogisticRegression::REG_L2,
+               int reg = 1,
+               int batch_size = 1);
+        double alpha;
+        int num_iters;
+        int norm;
+        int regularized;
+        int train_method;
+        int mini_batch_size;
+        cv::TermCriteria term_crit;
+    };
  
      enum { REG_L1 = 0, REG_L2 = 1};
      enum { BATCH = 0, MINI_BATCH = 1};
  
-    virtual bool train(cv::InputArray data_ip, cv::InputArray label_ip);
-    virtual void predict( cv::InputArray data, cv::OutputArray predicted_labels ) const;
-
-    virtual void write(FileStorage& fs) const;
-    virtual void read(const FileNode& fn);
+    // Algorithm interface
+    virtual void write( FileStorage &fs ) const = 0;
+    virtual void read( const FileNode &fn ) = 0;
  
-    const cv::Mat get_learnt_thetas() const;
-    virtual void clear();
-
-protected:
-
-    LogisticRegressionParams params;
-    cv::Mat learnt_thetas;
-    std::string default_model_name;
-    std::map<int, int> forward_mapper;
-    std::map<int, int> reverse_mapper;
+    // StatModel interface
+    virtual bool train( const Ptr<TrainData>& trainData, int flags=0 ) = 0;
+    virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
+    virtual void clear() = 0;
  
-    cv::Mat labels_o;
-    cv::Mat labels_n;
+    virtual Mat get_learnt_thetas() const = 0;
  
-    static cv::Mat calc_sigmoid(const cv::Mat& data);
-    virtual double compute_cost(const cv::Mat& data, const cv::Mat& labels, const cv::Mat& init_theta);
-    virtual cv::Mat compute_batch_gradient(const cv::Mat& data, const cv::Mat& labels, const cv::Mat& init_theta);
-    virtual cv::Mat compute_mini_batch_gradient(const cv::Mat& data, const cv::Mat& labels, const cv::Mat& init_theta);
-    virtual bool set_label_map(const cv::Mat& labels);
-    static cv::Mat remap_labels(const cv::Mat& labels, const std::map<int, int>& lmap);
+    static Ptr<LogisticRegression> create( const Params& params = Params() );
  };
  
  /****************************************************************************************\
diff --git a/modules/ml/src/lr.cpp b/modules/ml/src/lr.cpp

index e09a505..2a08e04 100644 (file)
--- a/modules/ml/src/lr.cpp
+++ b/modules/ml/src/lr.cpp
@@ -55,55 +55,72 @@
  
  #include "precomp.hpp"
  
-
-using namespace cv;
-using namespace cv::ml;
  using namespace std;
  
-LogisticRegressionParams::LogisticRegressionParams()
-{
-    term_crit = cv::TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.001);
-    alpha = 0.001;
-    num_iters = 1000;
-    norm = LogisticRegression::REG_L2;
-    regularized = 1;
-    train_method = LogisticRegression::BATCH;
-    mini_batch_size = 1;
-}
-LogisticRegressionParams::LogisticRegressionParams( double learning_rate, int iters, int train_algo = LogisticRegression::BATCH, int normlization = LogisticRegression::REG_L2, int reg = 1, int mb_size = 5)
+namespace cv {
+namespace ml {
+
+LogisticRegression::Params::Params(double learning_rate,
+                                   int iters,
+                                   int method,
+                                   int normlization,
+                                   int reg,
+                                   int batch_size)
  {
-    term_crit = cv::TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, iters, learning_rate);
      alpha = learning_rate;
      num_iters = iters;
      norm = normlization;
      regularized = reg;
-    train_method = train_algo;
-    mini_batch_size = mb_size;
+    train_method = method;
+    mini_batch_size = batch_size;
+    term_crit = cv::TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
  }
  
-LogisticRegression::LogisticRegression(const LogisticRegressionParams& pms)
+class LogisticRegressionImpl : public LogisticRegression
  {
-    default_model_name = "my_lr";
-    this->params = pms;
-}
-
-LogisticRegression::LogisticRegression(cv::InputArray data, cv::InputArray labels, const LogisticRegressionParams& pms)
-{
-    default_model_name = "my_lr";
-    this->params = pms;
-    train(data, labels);
-}
-
-LogisticRegression::~LogisticRegression()
+public:
+    LogisticRegressionImpl(const Params& pms)
+        : params(pms)
+    {
+    }
+    virtual ~LogisticRegressionImpl() {}
+
+    virtual bool train( const Ptr<TrainData>& trainData, int=0 );
+    virtual float predict(InputArray samples, OutputArray results, int) const;
+    virtual void clear();
+    virtual void write(FileStorage& fs) const;
+    virtual void read(const FileNode& fn);
+    virtual cv::Mat get_learnt_thetas() const;
+    virtual int getVarCount() const { return learnt_thetas.cols; }
+    virtual bool isTrained() const { return !learnt_thetas.empty(); }
+    virtual bool isClassifier() const { return true; }
+    virtual String getDefaultModelName() const { return "opencv_ml_lr"; }
+protected:
+    cv::Mat calc_sigmoid(const cv::Mat& data) const;
+    double compute_cost(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta);
+    cv::Mat compute_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta);
+    cv::Mat compute_mini_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta);
+    bool set_label_map(const cv::Mat& _labels_i);
+    cv::Mat remap_labels(const cv::Mat& _labels_i, const map<int, int>& lmap) const;
+protected:
+    Params params;
+    cv::Mat learnt_thetas;
+    map<int, int> forward_mapper;
+    map<int, int> reverse_mapper;
+    cv::Mat labels_o;
+    cv::Mat labels_n;
+};
+
+Ptr<LogisticRegression> LogisticRegression::create(const Params& params)
  {
-    clear();
+    return makePtr<LogisticRegressionImpl>(params);
  }
  
-bool LogisticRegression::train(cv::InputArray data_ip, cv::InputArray labels_ip)
+bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
  {
      clear();
-    cv::Mat _data_i = data_ip.getMat();
-    cv::Mat _labels_i = labels_ip.getMat();
+    cv::Mat _data_i = trainData->getSamples();
+    cv::Mat _labels_i = trainData->getResponses();
  
      CV_Assert( !_labels_i.empty() && !_data_i.empty());
  
@@ -194,13 +211,12 @@ bool LogisticRegression::train(cv::InputArray data_ip, cv::InputArray labels_ip)
      return ok;
  }
  
-
-void LogisticRegression::predict( cv::InputArray _ip_data, cv::OutputArray _output_predicted_labels ) const
+float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int) const
  {
      /* returns a class of the predicted class
      class names can be 1,2,3,4, .... etc */
      cv::Mat thetas, data, pred_labs;
-    data = _ip_data.getMat();
+    data = samples.getMat();
  
      // check if learnt_mats array is populated
      if(this->learnt_thetas.total()<=0)
@@ -266,19 +282,20 @@ void LogisticRegression::predict( cv::InputArray _ip_data, cv::OutputArray _outp
      pred_labs = remap_labels(labels_c, this->reverse_mapper);
      // convert pred_labs to integer type
      pred_labs.convertTo(pred_labs, CV_32S);
-    pred_labs.copyTo(_output_predicted_labels);
+    pred_labs.copyTo(results);
+    // TODO: determine
+    return 0;
  }
  
-cv::Mat LogisticRegression::calc_sigmoid(const Mat& data)
+cv::Mat LogisticRegressionImpl::calc_sigmoid(const cv::Mat& data) const
  {
      cv::Mat dest;
      cv::exp(-data, dest);
      return 1.0/(1.0+dest);
  }
  
-double LogisticRegression::compute_cost(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
+double LogisticRegressionImpl::compute_cost(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
  {
-
      int llambda = 0;
      int m;
      int n;
@@ -328,7 +345,7 @@ double LogisticRegression::compute_cost(const cv::Mat& _data, const cv::Mat& _la
      return cost;
  }
  
-cv::Mat LogisticRegression::compute_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
+cv::Mat LogisticRegressionImpl::compute_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
  {
      // implements batch gradient descent
      if(this->params.alpha<=0)
@@ -397,7 +414,7 @@ cv::Mat LogisticRegression::compute_batch_gradient(const cv::Mat& _data, const c
      return theta_p;
  }
  
-cv::Mat LogisticRegression::compute_mini_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
+cv::Mat LogisticRegressionImpl::compute_mini_batch_gradient(const cv::Mat& _data, const cv::Mat& _labels, const cv::Mat& _init_theta)
  {
      // implements batch gradient descent
      int lambda_l = 0;
@@ -488,7 +505,7 @@ cv::Mat LogisticRegression::compute_mini_batch_gradient(const cv::Mat& _data, co
      return theta_p;
  }
  
-bool LogisticRegression::set_label_map(const cv::Mat& _labels_i)
+bool LogisticRegressionImpl::set_label_map(const cv::Mat &_labels_i)
  {
      // this function creates two maps to map user defined labels to program friendly labels two ways.
      int ii = 0;
@@ -522,7 +539,7 @@ bool LogisticRegression::set_label_map(const cv::Mat& _labels_i)
      return ok;
  }
  
-cv::Mat LogisticRegression::remap_labels(const Mat& _labels_i, const std::map<int, int>& lmap)
+cv::Mat LogisticRegressionImpl::remap_labels(const cv::Mat& _labels_i, const map<int, int>& lmap) const
  {
      cv::Mat labels;
      _labels_i.convertTo(labels, CV_32S);
@@ -538,14 +555,14 @@ cv::Mat LogisticRegression::remap_labels(const Mat& _labels_i, const std::map<in
      return new_labels;
  }
  
-void LogisticRegression::clear()
+void LogisticRegressionImpl::clear()
  {
      this->learnt_thetas.release();
      this->labels_o.release();
      this->labels_n.release();
  }
  
-void LogisticRegression::write(FileStorage& fs) const
+void LogisticRegressionImpl::write(FileStorage& fs) const
  {
      // check if open
      if(fs.isOpened() == 0)
@@ -568,7 +585,7 @@ void LogisticRegression::write(FileStorage& fs) const
      fs<<"o_labels"<<this->labels_o;
  }
  
-void LogisticRegression::read(const FileNode& fn )
+void LogisticRegressionImpl::read(const FileNode& fn)
  {
      // check if empty
      if(fn.empty())
@@ -598,8 +615,12 @@ void LogisticRegression::read(const FileNode& fn )
      }
  }
  
-const cv::Mat LogisticRegression::get_learnt_thetas() const
+cv::Mat LogisticRegressionImpl::get_learnt_thetas() const
  {
      return this->learnt_thetas;
  }
+
+}
+}
+
  /* End of file. */
diff --git a/modules/ml/test/test_lr.cpp b/modules/ml/test/test_lr.cpp

index 90ee7b8..a5f1306 100644 (file)
--- a/modules/ml/test/test_lr.cpp
+++ b/modules/ml/test/test_lr.cpp
@@ -92,78 +92,29 @@ protected:
  void CV_LRTest::run( int /*start_from*/ )
  {
      // initialize varibles from the popular Iris Dataset
-    Mat data = (Mat_<double>(150, 4)<<
-        5.1,3.5,1.4,0.2, 4.9,3.0,1.4,0.2, 4.7,3.2,1.3,0.2, 4.6,3.1,1.5,0.2,
-        5.0,3.6,1.4,0.2, 5.4,3.9,1.7,0.4, 4.6,3.4,1.4,0.3, 5.0,3.4,1.5,0.2,
-        4.4,2.9,1.4,0.2, 4.9,3.1,1.5,0.1, 5.4,3.7,1.5,0.2, 4.8,3.4,1.6,0.2,
-        4.8,3.0,1.4,0.1, 4.3,3.0,1.1,0.1, 5.8,4.0,1.2,0.2, 5.7,4.4,1.5,0.4,
-        5.4,3.9,1.3,0.4, 5.1,3.5,1.4,0.3, 5.7,3.8,1.7,0.3, 5.1,3.8,1.5,0.3,
-        5.4,3.4,1.7,0.2, 5.1,3.7,1.5,0.4, 4.6,3.6,1.0,0.2, 5.1,3.3,1.7,0.5,
-        4.8,3.4,1.9,0.2, 5.0,3.0,1.6,0.2, 5.0,3.4,1.6,0.4, 5.2,3.5,1.5,0.2,
-        5.2,3.4,1.4,0.2, 4.7,3.2,1.6,0.2, 4.8,3.1,1.6,0.2, 5.4,3.4,1.5,0.4,
-        5.2,4.1,1.5,0.1, 5.5,4.2,1.4,0.2, 4.9,3.1,1.5,0.1, 5.0,3.2,1.2,0.2,
-        5.5,3.5,1.3,0.2, 4.9,3.1,1.5,0.1, 4.4,3.0,1.3,0.2, 5.1,3.4,1.5,0.2,
-        5.0,3.5,1.3,0.3, 4.5,2.3,1.3,0.3, 4.4,3.2,1.3,0.2, 5.0,3.5,1.6,0.6,
-        5.1,3.8,1.9,0.4, 4.8,3.0,1.4,0.3, 5.1,3.8,1.6,0.2, 4.6,3.2,1.4,0.2,
-        5.3,3.7,1.5,0.2, 5.0,3.3,1.4,0.2, 7.0,3.2,4.7,1.4, 6.4,3.2,4.5,1.5,
-        6.9,3.1,4.9,1.5, 5.5,2.3,4.0,1.3, 6.5,2.8,4.6,1.5, 5.7,2.8,4.5,1.3,
-        6.3,3.3,4.7,1.6, 4.9,2.4,3.3,1.0, 6.6,2.9,4.6,1.3, 5.2,2.7,3.9,1.4,
-        5.0,2.0,3.5,1.0, 5.9,3.0,4.2,1.5, 6.0,2.2,4.0,1.0, 6.1,2.9,4.7,1.4,
-        5.6,2.9,3.6,1.3, 6.7,3.1,4.4,1.4, 5.6,3.0,4.5,1.5, 5.8,2.7,4.1,1.0,
-        6.2,2.2,4.5,1.5, 5.6,2.5,3.9,1.1, 5.9,3.2,4.8,1.8, 6.1,2.8,4.0,1.3,
-        6.3,2.5,4.9,1.5, 6.1,2.8,4.7,1.2, 6.4,2.9,4.3,1.3, 6.6,3.0,4.4,1.4,
-        6.8,2.8,4.8,1.4, 6.7,3.0,5.0,1.7, 6.0,2.9,4.5,1.5, 5.7,2.6,3.5,1.0,
-        5.5,2.4,3.8,1.1, 5.5,2.4,3.7,1.0, 5.8,2.7,3.9,1.2, 6.0,2.7,5.1,1.6,
-        5.4,3.0,4.5,1.5, 6.0,3.4,4.5,1.6, 6.7,3.1,4.7,1.5, 6.3,2.3,4.4,1.3,
-        5.6,3.0,4.1,1.3, 5.5,2.5,4.0,1.3, 5.5,2.6,4.4,1.2, 6.1,3.0,4.6,1.4,
-        5.8,2.6,4.0,1.2, 5.0,2.3,3.3,1.0, 5.6,2.7,4.2,1.3, 5.7,3.0,4.2,1.2,
-        5.7,2.9,4.2,1.3, 6.2,2.9,4.3,1.3, 5.1,2.5,3.0,1.1, 5.7,2.8,4.1,1.3,
-        6.3,3.3,6.0,2.5, 5.8,2.7,5.1,1.9, 7.1,3.0,5.9,2.1, 6.3,2.9,5.6,1.8,
-        6.5,3.0,5.8,2.2, 7.6,3.0,6.6,2.1, 4.9,2.5,4.5,1.7, 7.3,2.9,6.3,1.8,
-        6.7,2.5,5.8,1.8, 7.2,3.6,6.1,2.5, 6.5,3.2,5.1,2.0, 6.4,2.7,5.3,1.9,
-        6.8,3.0,5.5,2.1, 5.7,2.5,5.0,2.0, 5.8,2.8,5.1,2.4, 6.4,3.2,5.3,2.3,
-        6.5,3.0,5.5,1.8, 7.7,3.8,6.7,2.2, 7.7,2.6,6.9,2.3, 6.0,2.2,5.0,1.5,
-        6.9,3.2,5.7,2.3, 5.6,2.8,4.9,2.0, 7.7,2.8,6.7,2.0, 6.3,2.7,4.9,1.8,
-        6.7,3.3,5.7,2.1, 7.2,3.2,6.0,1.8, 6.2,2.8,4.8,1.8, 6.1,3.0,4.9,1.8,
-        6.4,2.8,5.6,2.1, 7.2,3.0,5.8,1.6, 7.4,2.8,6.1,1.9, 7.9,3.8,6.4,2.0,
-        6.4,2.8,5.6,2.2, 6.3,2.8,5.1,1.5, 6.1,2.6,5.6,1.4, 7.7,3.0,6.1,2.3,
-        6.3,3.4,5.6,2.4, 6.4,3.1,5.5,1.8, 6.0,3.0,4.8,1.8, 6.9,3.1,5.4,2.1,
-        6.7,3.1,5.6,2.4, 6.9,3.1,5.1,2.3, 5.8,2.7,5.1,1.9, 6.8,3.2,5.9,2.3,
-        6.7,3.3,5.7,2.5, 6.7,3.0,5.2,2.3, 6.3,2.5,5.0,1.9, 6.5,3.0,5.2,2.0,
-        6.2,3.4,5.4,2.3, 5.9,3.0,5.1,1.8);
-
-    Mat labels = (Mat_<int>(150, 1)<< 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-      1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-          3, 3, 3, 3, 3);
+    string dataFileName = ts->get_data_path() + "iris.data";
+    Ptr<TrainData> tdata = TrainData::loadFromCSV(dataFileName, 0);
  
-    Mat responses1, responses2;
-    float error = 0.0f;
-
-    LogisticRegressionParams params1 = LogisticRegressionParams();
-
-    params1.alpha = 1.0;
-    params1.num_iters = 10001;
-    params1.norm = LogisticRegression::REG_L2;
-    params1.regularized = 1;
-    params1.train_method = LogisticRegression::BATCH;
-    params1.mini_batch_size = 10;
+    LogisticRegression::Params params = LogisticRegression::Params();
+    params.alpha = 1.0;
+    params.num_iters = 10001;
+    params.norm = LogisticRegression::REG_L2;
+    params.regularized = 1;
+    params.train_method = LogisticRegression::BATCH;
+    params.mini_batch_size = 10;
  
      // run LR classifier train classifier
-    data.convertTo(data, CV_32FC1);
-    labels.convertTo(labels, CV_32FC1);
-    LogisticRegression lr1(data, labels, params1);
+    Ptr<LogisticRegression> p = LogisticRegression::create(params);
+    p->train(tdata);
  
      // predict using the same data
-    lr1.predict(data, responses1);
-
-    int test_code = cvtest::TS::OK;
+    Mat responses;
+    p->predict(tdata->getSamples(), responses);
  
      // calculate error
-    if(!calculateError(responses1, labels, error))
+    int test_code = cvtest::TS::OK;
+    float error = 0.0f;
+    if(!calculateError(responses, tdata->getResponses(), error))
      {
          ts->printf(cvtest::TS::LOG, "Bad prediction labels\n" );
          test_code = cvtest::TS::FAIL_INVALID_OUTPUT;
@@ -174,6 +125,14 @@ void CV_LRTest::run( int /*start_from*/ )
          test_code = cvtest::TS::FAIL_BAD_ACCURACY;
      }
  
+    {
+        FileStorage s("debug.xml", FileStorage::WRITE);
+        s << "original" << tdata->getResponses();
+        s << "predicted1" << responses;
+        s << "learnt" << p->get_learnt_thetas();
+        s << "error" << error;
+        s.release();
+    }
      ts->set_failed_test_info(test_code);
  }
  
@@ -189,69 +148,16 @@ protected:
  
  void CV_LRTest_SaveLoad::run( int /*start_from*/ )
  {
-
      int code = cvtest::TS::OK;
  
      // initialize varibles from the popular Iris Dataset
-    Mat data = (Mat_<double>(150, 4)<<
-        5.1,3.5,1.4,0.2, 4.9,3.0,1.4,0.2, 4.7,3.2,1.3,0.2, 4.6,3.1,1.5,0.2,
-        5.0,3.6,1.4,0.2, 5.4,3.9,1.7,0.4, 4.6,3.4,1.4,0.3, 5.0,3.4,1.5,0.2,
-        4.4,2.9,1.4,0.2, 4.9,3.1,1.5,0.1, 5.4,3.7,1.5,0.2, 4.8,3.4,1.6,0.2,
-        4.8,3.0,1.4,0.1, 4.3,3.0,1.1,0.1, 5.8,4.0,1.2,0.2, 5.7,4.4,1.5,0.4,
-        5.4,3.9,1.3,0.4, 5.1,3.5,1.4,0.3, 5.7,3.8,1.7,0.3, 5.1,3.8,1.5,0.3,
-        5.4,3.4,1.7,0.2, 5.1,3.7,1.5,0.4, 4.6,3.6,1.0,0.2, 5.1,3.3,1.7,0.5,
-        4.8,3.4,1.9,0.2, 5.0,3.0,1.6,0.2, 5.0,3.4,1.6,0.4, 5.2,3.5,1.5,0.2,
-        5.2,3.4,1.4,0.2, 4.7,3.2,1.6,0.2, 4.8,3.1,1.6,0.2, 5.4,3.4,1.5,0.4,
-        5.2,4.1,1.5,0.1, 5.5,4.2,1.4,0.2, 4.9,3.1,1.5,0.1, 5.0,3.2,1.2,0.2,
-        5.5,3.5,1.3,0.2, 4.9,3.1,1.5,0.1, 4.4,3.0,1.3,0.2, 5.1,3.4,1.5,0.2,
-        5.0,3.5,1.3,0.3, 4.5,2.3,1.3,0.3, 4.4,3.2,1.3,0.2, 5.0,3.5,1.6,0.6,
-        5.1,3.8,1.9,0.4, 4.8,3.0,1.4,0.3, 5.1,3.8,1.6,0.2, 4.6,3.2,1.4,0.2,
-        5.3,3.7,1.5,0.2, 5.0,3.3,1.4,0.2, 7.0,3.2,4.7,1.4, 6.4,3.2,4.5,1.5,
-        6.9,3.1,4.9,1.5, 5.5,2.3,4.0,1.3, 6.5,2.8,4.6,1.5, 5.7,2.8,4.5,1.3,
-        6.3,3.3,4.7,1.6, 4.9,2.4,3.3,1.0, 6.6,2.9,4.6,1.3, 5.2,2.7,3.9,1.4,
-        5.0,2.0,3.5,1.0, 5.9,3.0,4.2,1.5, 6.0,2.2,4.0,1.0, 6.1,2.9,4.7,1.4,
-        5.6,2.9,3.6,1.3, 6.7,3.1,4.4,1.4, 5.6,3.0,4.5,1.5, 5.8,2.7,4.1,1.0,
-        6.2,2.2,4.5,1.5, 5.6,2.5,3.9,1.1, 5.9,3.2,4.8,1.8, 6.1,2.8,4.0,1.3,
-        6.3,2.5,4.9,1.5, 6.1,2.8,4.7,1.2, 6.4,2.9,4.3,1.3, 6.6,3.0,4.4,1.4,
-        6.8,2.8,4.8,1.4, 6.7,3.0,5.0,1.7, 6.0,2.9,4.5,1.5, 5.7,2.6,3.5,1.0,
-        5.5,2.4,3.8,1.1, 5.5,2.4,3.7,1.0, 5.8,2.7,3.9,1.2, 6.0,2.7,5.1,1.6,
-        5.4,3.0,4.5,1.5, 6.0,3.4,4.5,1.6, 6.7,3.1,4.7,1.5, 6.3,2.3,4.4,1.3,
-        5.6,3.0,4.1,1.3, 5.5,2.5,4.0,1.3, 5.5,2.6,4.4,1.2, 6.1,3.0,4.6,1.4,
-        5.8,2.6,4.0,1.2, 5.0,2.3,3.3,1.0, 5.6,2.7,4.2,1.3, 5.7,3.0,4.2,1.2,
-        5.7,2.9,4.2,1.3, 6.2,2.9,4.3,1.3, 5.1,2.5,3.0,1.1, 5.7,2.8,4.1,1.3,
-        6.3,3.3,6.0,2.5, 5.8,2.7,5.1,1.9, 7.1,3.0,5.9,2.1, 6.3,2.9,5.6,1.8,
-        6.5,3.0,5.8,2.2, 7.6,3.0,6.6,2.1, 4.9,2.5,4.5,1.7, 7.3,2.9,6.3,1.8,
-        6.7,2.5,5.8,1.8, 7.2,3.6,6.1,2.5, 6.5,3.2,5.1,2.0, 6.4,2.7,5.3,1.9,
-        6.8,3.0,5.5,2.1, 5.7,2.5,5.0,2.0, 5.8,2.8,5.1,2.4, 6.4,3.2,5.3,2.3,
-        6.5,3.0,5.5,1.8, 7.7,3.8,6.7,2.2, 7.7,2.6,6.9,2.3, 6.0,2.2,5.0,1.5,
-        6.9,3.2,5.7,2.3, 5.6,2.8,4.9,2.0, 7.7,2.8,6.7,2.0, 6.3,2.7,4.9,1.8,
-        6.7,3.3,5.7,2.1, 7.2,3.2,6.0,1.8, 6.2,2.8,4.8,1.8, 6.1,3.0,4.9,1.8,
-        6.4,2.8,5.6,2.1, 7.2,3.0,5.8,1.6, 7.4,2.8,6.1,1.9, 7.9,3.8,6.4,2.0,
-        6.4,2.8,5.6,2.2, 6.3,2.8,5.1,1.5, 6.1,2.6,5.6,1.4, 7.7,3.0,6.1,2.3,
-        6.3,3.4,5.6,2.4, 6.4,3.1,5.5,1.8, 6.0,3.0,4.8,1.8, 6.9,3.1,5.4,2.1,
-        6.7,3.1,5.6,2.4, 6.9,3.1,5.1,2.3, 5.8,2.7,5.1,1.9, 6.8,3.2,5.9,2.3,
-        6.7,3.3,5.7,2.5, 6.7,3.0,5.2,2.3, 6.3,2.5,5.0,1.9, 6.5,3.0,5.2,2.0,
-        6.2,3.4,5.4,2.3, 5.9,3.0,5.1,1.8);
-
-    Mat labels = (Mat_<int>(150, 1)<< 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-      1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-          3, 3, 3, 3, 3);
-
-    // LogisticRegressionParams params = LogisticRegressionParams();
+    string dataFileName = ts->get_data_path() + "iris.data";
+    Ptr<TrainData> tdata = TrainData::loadFromCSV(dataFileName, 0);
  
      Mat responses1, responses2;
      Mat learnt_mat1, learnt_mat2;
-    Mat pred_result1, comp_learnt_mats;
-
-    float errorCount = 0.0;
-
-    LogisticRegressionParams params1 = LogisticRegressionParams();
-    LogisticRegressionParams params2 = LogisticRegressionParams();
  
+    LogisticRegression::Params params1 = LogisticRegression::Params();
      params1.alpha = 1.0;
      params1.num_iters = 10001;
      params1.norm = LogisticRegression::REG_L2;
@@ -259,56 +165,40 @@ void CV_LRTest_SaveLoad::run( int /*start_from*/ )
      params1.train_method = LogisticRegression::BATCH;
      params1.mini_batch_size = 10;
  
-    data.convertTo(data, CV_32FC1);
-    labels.convertTo(labels, CV_32FC1);
-
-    // run LR classifier train classifier
-    LogisticRegression lr1(data, labels, params1);
-    LogisticRegression lr2(params2);
-    learnt_mat1 = lr1.get_learnt_thetas();
-
-    lr1.predict(data, responses1);
-    // now save the classifier
-
-    string filename = cv::tempfile(".xml");
+    // train and save the classifier
+    String filename = cv::tempfile(".xml");
      try
      {
-      //lr1.save(filename.c_str());
-      FileStorage fs;
-      fs.open(filename.c_str(),FileStorage::WRITE);
-      lr1.write(fs);
-      fs.release();
+        // run LR classifier train classifier
+        Ptr<LogisticRegression> lr1 = LogisticRegression::create(params1);
+        lr1->train(tdata);
+        lr1->predict(tdata->getSamples(), responses1);
+        learnt_mat1 = lr1->get_learnt_thetas();
+        lr1->save(filename);
      }
-
      catch(...)
      {
          ts->printf(cvtest::TS::LOG, "Crash in write method.\n" );
          ts->set_failed_test_info(cvtest::TS::FAIL_EXCEPTION);
      }
  
+    // and load to another
      try
      {
-      //lr2.load(filename.c_str());
-      FileStorage fs;
-      fs.open(filename.c_str(),FileStorage::READ);
-      FileNode fn = fs.root();
-      lr2.read(fn);
-      fs.release();
+        Ptr<LogisticRegression> lr2 = StatModel::load<LogisticRegression>(filename);
+        lr2->predict(tdata->getSamples(), responses2);
+        learnt_mat2 = lr2->get_learnt_thetas();
      }
-
      catch(...)
      {
-        ts->printf(cvtest::TS::LOG, "Crash in read method.\n");
+        ts->printf(cvtest::TS::LOG, "Crash in write method.\n" );
          ts->set_failed_test_info(cvtest::TS::FAIL_EXCEPTION);
      }
  
-    lr2.predict(data, responses2);
-
-    learnt_mat2 = lr2.get_learnt_thetas();
-
      CV_Assert(responses1.rows == responses2.rows);
  
      // compare difference in learnt matrices before and after loading from disk
+    Mat comp_learnt_mats;
      comp_learnt_mats = (learnt_mat1 == learnt_mat2);
      comp_learnt_mats = comp_learnt_mats.reshape(1, comp_learnt_mats.rows*comp_learnt_mats.cols);
      comp_learnt_mats.convertTo(comp_learnt_mats, CV_32S);
@@ -317,6 +207,7 @@ void CV_LRTest_SaveLoad::run( int /*start_from*/ )
      // compare difference in prediction outputs and stored inputs
      // check if there is any difference between computed learnt mat and retreived mat
  
+    float errorCount = 0.0;
      errorCount += 1 - (float)cv::countNonZero(responses1 == responses2)/responses1.rows;
      errorCount += 1 - (float)cv::sum(comp_learnt_mats)[0]/comp_learnt_mats.rows;
  
diff --git a/samples/cpp/logistic_regression.cpp b/samples/cpp/logistic_regression.cpp

index 2ef41c0..e97f602 100644 (file)
--- a/samples/cpp/logistic_regression.cpp
+++ b/samples/cpp/logistic_regression.cpp
@@ -1,4 +1,4 @@
-///////////////////////////////////////////////////////////////////////////////////////
+/*//////////////////////////////////////////////////////////////////////////////////////
  // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  
  //  By downloading, copying, installing or using the software you agree to this license.
@@ -11,7 +11,8 @@
  // Rahul Kavi rahulkavi[at]live[at]com
  //
  
-// contains a subset of data from the popular Iris Dataset (taken from "http://archive.ics.uci.edu/ml/datasets/Iris")
+// contains a subset of data from the popular Iris Dataset (taken from
+// "http://archive.ics.uci.edu/ml/datasets/Iris")
  
  // # You are free to use, change, or redistribute the code in any way you wish for
  // # non-commercial purposes, but please maintain the name of the original author.
@@ -24,7 +25,6 @@
  
  // # Logistic Regression ALGORITHM
  
-
  //                           License Agreement
  //                For Open Source Computer Vision Library
  
@@ -54,7 +54,7 @@
  // loss of use, data, or profits; or business interruption) however caused
  // and on any theory of liability, whether in contract, strict liability,
  // or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
+// the use of this software, even if advised of the possibility of such damage.*/
  
  #include <iostream>
  
@@ -62,42 +62,45 @@
  #include <opencv2/ml/ml.hpp>
  #include <opencv2/highgui/highgui.hpp>
  
-
  using namespace std;
  using namespace cv;
  using namespace cv::ml;
  
  int main()
  {
-    Mat data_temp, labels_temp;
+    const String filename = "data01.xml";
+    cout << "**********************************************************************" << endl;
+    cout << filename
+         << " contains digits 0 and 1 of 20 samples each, collected on an Android device" << endl;
+    cout << "Each of the collected images are of size 28 x 28 re-arranged to 1 x 784 matrix"
+         << endl;
+    cout << "**********************************************************************" << endl;
+
      Mat data, labels;
+    {
+        cout << "loading the dataset" << endl;
+        FileStorage f;
+        if(f.open(filename, FileStorage::READ))
+        {
+            f["datamat"] >> data;
+            f["labelsmat"] >> labels;
+            f.release();
+        }
+        else
+        {
+            cerr << "File can not be opened: " << filename << endl;
+            return 1;
+        }
+        data.convertTo(data, CV_32F);
+        labels.convertTo(labels, CV_32F);
+        cout << "read " << data.rows << " rows of data" << endl;
+    }
  
      Mat data_train, data_test;
      Mat labels_train, labels_test;
-
-    Mat responses, result;
-    FileStorage fs1, fs2;
-
-    FileStorage f;
-
-    cout<<"*****************************************************************************************"<<endl;
-    cout<<"\"data01.xml\" contains digits 0 and 1 of 20 samples each, collected on an Android device"<<endl;
-    cout<<"Each of the collected images are of size 28 x 28 re-arranged to 1 x 784 matrix"<<endl;
-    cout<<"*****************************************************************************************\n\n"<<endl;
-
-    cout<<"loading the dataset\n"<<endl;
-
-    f.open("data01.xml", FileStorage::READ);
-
-    f["datamat"] >> data_temp;
-    f["labelsmat"] >> labels_temp;
-
-    data_temp.convertTo(data, CV_32F);
-    labels_temp.convertTo(labels, CV_32F);
-
-    for(int i =0;i<data.rows;i++)
+    for(int i = 0; i < data.rows; i++)
      {
-        if(i%2 ==0)
+        if(i % 2 == 0)
          {
              data_train.push_back(data.row(i));
              labels_train.push_back(labels.row(i));
@@ -108,66 +111,66 @@ int main()
              labels_test.push_back(labels.row(i));
          }
      }
-
-    cout<<"training samples per class: "<<data_train.rows/2<<endl;
-    cout<<"testing samples per class: "<<data_test.rows/2<<endl;
+    cout << "training/testing samples count: " << data_train.rows << "/" << data_test.rows << endl;
  
      // display sample image
-    Mat img_disp1 = data_train.row(2).reshape(0,28).t();
-    Mat img_disp2 = data_train.row(18).reshape(0,28).t();
-
-    imshow("digit 0", img_disp1);
-    imshow("digit 1", img_disp2);
-
-    cout<<"initializing Logisitc Regression Parameters\n"<<endl;
-
-    // LogisticRegressionParams params1 = LogisticRegressionParams(0.001, 10, LogisticRegression::BATCH, LogisticRegression::REG_L2, 1, 1);
-    // params1 (above) with batch gradient performs better than mini batch gradient below with same parameters
-    LogisticRegressionParams params1 = LogisticRegressionParams(0.001, 10, LogisticRegression::MINI_BATCH, LogisticRegression::REG_L2, 1, 1);
+//    Mat bigImage;
+//    for(int i = 0; i < data_train.rows; ++i)
+//    {
+//        bigImage.push_back(data_train.row(i).reshape(0, 28));
+//    }
+//    imshow("digits", bigImage.t());
  
-    // however mini batch gradient descent parameters with slower learning rate(below) can be used to get higher accuracy than with parameters mentioned above
-    // LogisticRegressionParams params1 = LogisticRegressionParams(0.000001, 10, LogisticRegression::MINI_BATCH, LogisticRegression::REG_L2, 1, 1);
-
-    cout<<"training Logisitc Regression classifier\n"<<endl;
+    Mat responses, result;
  
-    LogisticRegression lr1(data_train, labels_train, params1);
-    lr1.predict(data_test, responses);
+//    LogisticRegression::Params params = LogisticRegression::Params(
+//        0.001, 10, LogisticRegression::BATCH, LogisticRegression::REG_L2, 1, 1);
+    // params1 (above) with batch gradient performs better than mini batch
+    // gradient below with same parameters
+    LogisticRegression::Params params = LogisticRegression::Params(
+        0.001, 10, LogisticRegression::MINI_BATCH, LogisticRegression::REG_L2, 1, 1);
+
+    // however mini batch gradient descent parameters with slower learning
+    // rate(below) can be used to get higher accuracy than with parameters
+    // mentioned above
+//    LogisticRegression::Params params = LogisticRegression::Params(
+//        0.000001, 10, LogisticRegression::MINI_BATCH, LogisticRegression::REG_L2, 1, 1);
+
+    cout << "training...";
+    Ptr<StatModel> lr1 = LogisticRegression::create(params);
+    lr1->train(data_train, ROW_SAMPLE, labels_train);
+    cout << "done!" << endl;
+
+    cout << "predicting...";
+    lr1->predict(data_test, responses);
+    cout << "done!" << endl;
+
+    // show prediction report
+    cout << "original vs predicted:" << endl;
      labels_test.convertTo(labels_test, CV_32S);
-
-    cout<<"Original Label ::  Predicted Label"<<endl;
-    result = (labels_test == responses)/255;
-
-    for(int i=0;i<labels_test.rows;i++)
-    {
-        cout<<labels_test.at<int>(i,0)<<" :: "<< responses.at<int>(i,0)<<endl;
-    }
-
-    // calculate accuracy
-    cout<<"accuracy: "<<((double)cv::sum(result)[0]/result.rows)*100<<"%\n";
-    cout<<"saving the classifier"<<endl;
+    cout << labels_test.t() << endl;
+    cout << responses.t() << endl;
+    result = (labels_test == responses) / 255;
+    cout << "accuracy: " << ((double)cv::sum(result)[0] / result.rows) * 100 << "%\n";
  
      // save the classfier
-    fs1.open("NewLR_Trained.xml",FileStorage::WRITE);
-    lr1.write(fs1);
-    fs1.release();
+    cout << "saving the classifier" << endl;
+    const String saveFilename = "NewLR_Trained.xml";
+    lr1->save(saveFilename);
  
      // load the classifier onto new object
-    LogisticRegressionParams params2 = LogisticRegressionParams();
-    LogisticRegression lr2(params2);
-    cout<<"loading a new classifier"<<endl;
-    fs2.open("NewLR_Trained.xml",FileStorage::READ);
-    FileNode fn2 = fs2.root();
-    lr2.read(fn2);
-    fs2.release();
-
-    Mat responses2;
+    cout << "loading a new classifier" << endl;
+    Ptr<LogisticRegression> lr2 = StatModel::load<LogisticRegression>(saveFilename);
  
      // predict using loaded classifier
-    cout<<"predicting the dataset using the loaded classfier\n"<<endl;
-    lr2.predict(data_test, responses2);
+    cout << "predicting the dataset using the loaded classfier" << endl;
+    Mat responses2;
+    lr2->predict(data_test, responses2);
      // calculate accuracy
-    cout<<"accuracy using loaded classifier: "<<100 * (float)cv::countNonZero(labels_test == responses2)/responses2.rows<<"%"<<endl;
-    waitKey(0);
+    cout << "accuracy using loaded classifier: "
+         << 100 * (float)cv::countNonZero(labels_test == responses2) / responses2.rows << "%"
+         << endl;
  
+    waitKey(0);
      return 0;
  }
author	Maksim Shabunin <maksim.shabunin@itseez.com>
	Thu, 14 Aug 2014 15:01:45 +0000 (19:01 +0400)
committer	Maksim Shabunin <maksim.shabunin@itseez.com>
	Mon, 18 Aug 2014 15:06:58 +0000 (19:06 +0400)
modules/ml/include/opencv2/ml.hpp		patch \| blob \| history
modules/ml/src/lr.cpp		patch \| blob \| history
modules/ml/test/test_lr.cpp		patch \| blob \| history
samples/cpp/logistic_regression.cpp		patch \| blob \| history