struct CV_EXPORTS CvTrainTestSplit
{
-public:
CvTrainTestSplit();
- CvTrainTestSplit( int _train_sample_count, bool _mix = true);
- CvTrainTestSplit( float _train_sample_portion, bool _mix = true);
+ CvTrainTestSplit( int train_sample_count, bool mix = true);
+ CvTrainTestSplit( float train_sample_portion, bool mix = true);
union
{
} train_sample_part;
int train_sample_part_mode;
- union
- {
- int *count;
- float *portion;
- } *class_part;
- int class_part_mode;
-
- bool mix;
+ bool mix;
};
class CV_EXPORTS CvMLData
// 1 - file can not be opened or is not correct
int read_csv( const char* filename );
- const CvMat* get_values();
+ const CvMat* get_values() const;
const CvMat* get_responses();
- const CvMat* get_missing();
+ const CvMat* get_missing() const;
void set_response_idx( int idx ); // old response become predictors, new response_idx = idx
// if idx < 0 there will be no response
- int get_response_idx();
+ int get_response_idx() const;
- const CvMat* get_train_sample_idx();
- const CvMat* get_test_sample_idx();
- void mix_train_and_test_idx();
void set_train_test_split( const CvTrainTestSplit * spl );
+ const CvMat* get_train_sample_idx() const;
+ const CvMat* get_test_sample_idx() const;
+ void mix_train_and_test_idx();
const CvMat* get_var_idx();
void chahge_var_idx( int vi, bool state ); // state == true to set vi-variable as predictor
const CvMat* get_var_types();
- int get_var_type( int var_idx );
+ int get_var_type( int var_idx ) const;
// following 2 methods enable to change vars type
// use these methods to assign CV_VAR_CATEGORICAL type for categorical variable
// with numerical labels; in the other cases var types are correctly determined automatically
void change_var_type( int var_idx, int type); // type in { CV_VAR_ORDERED, CV_VAR_CATEGORICAL }
void set_delimiter( char ch );
- char get_delimiter();
+ char get_delimiter() const;
void set_miss_ch( char ch );
- char get_miss_ch();
+ char get_miss_ch() const;
+ const std::map<std::string, int>& get_class_labels_map() const;
+
protected:
virtual void clear();
bool mix;
int total_class_count;
- std::map<std::string, int> *class_map;
+ std::map<std::string, int> class_map;
CvMat* train_sample_idx;
CvMat* test_sample_idx;
{
train_sample_part_mode = CV_COUNT;
train_sample_part.count = -1;
- class_part = 0;
mix = false;
}
{
train_sample_part_mode = CV_COUNT;
train_sample_part.count = _train_sample_count;
- class_part = 0;
mix = _mix;
}
{
train_sample_part_mode = CV_PORTION;
train_sample_part.portion = _train_sample_portion;
- class_part = 0;
mix = _mix;
}
miss_ch = '?';
//flt_separator = '.';
- class_map = new std::map<std::string, int>();
rng = &cv::theRNG();
}
CvMLData::~CvMLData()
{
clear();
- delete class_map;
}
void CvMLData::free_train_test_idx()
void CvMLData::clear()
{
- if ( !class_map->empty() )
- class_map->clear();
+ class_map.clear();
cvReleaseMat( &values );
cvReleaseMat( &missing );
return 0;
}
-const CvMat* CvMLData::get_values()
+const CvMat* CvMLData::get_values() const
{
return values;
}
-const CvMat* CvMLData::get_missing()
+const CvMat* CvMLData::get_missing() const
{
+ CV_FUNCNAME( "CvMLData::get_missing" );
+ __BEGIN__;
+
+ if ( !values )
+ CV_ERROR( CV_StsInternal, "data is empty" );
+
+ __END__;
+
return missing;
}
+const std::map<std::string, int>& CvMLData::get_class_labels_map() const
+{
+ return class_map;
+}
+
void CvMLData::str_to_flt_elem( const char* token, float& flt_elem, int& type)
{
{
if ( (*stopstring != 0) && (*stopstring != '\n') && (strcmp(stopstring, "\r\n") != 0) ) // class label
{
- int idx = (*class_map)[token];
+ int idx = class_map[token];
if ( idx == 0)
{
total_class_count++;
idx = total_class_count;
- (*class_map)[token] = idx;
+ class_map[token] = idx;
}
flt_elem = (float)idx;
type = CV_VAR_CATEGORICAL;
__END__;
}
-char CvMLData::get_delimiter()
+char CvMLData::get_delimiter() const
{
return delimiter;
}
__END__;
}
-char CvMLData::get_miss_ch()
+char CvMLData::get_miss_ch() const
{
return miss_ch;
}
__END__;
}
-int CvMLData::get_response_idx()
+int CvMLData::get_response_idx() const
{
+ CV_FUNCNAME( "CvMLData::get_response_idx" );
+ __BEGIN__;
+
+ if ( !values )
+ CV_ERROR( CV_StsInternal, "data is empty" );
+ __END__;
return response_idx;
}
return var_types_out;
}
-int CvMLData::get_var_type( int var_idx )
+int CvMLData::get_var_type( int var_idx ) const
{
return var_types->data.ptr[var_idx];
}
int sample_count = 0;
- if ( spl->class_part )
- CV_ERROR( CV_StsBadArg, "this division type is not supported yet" );
-
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
__END__;
}
-const CvMat* CvMLData::get_train_sample_idx()
+const CvMat* CvMLData::get_train_sample_idx() const
{
+ CV_FUNCNAME( "CvMLData::get_train_sample_idx" );
+ __BEGIN__;
+
+ if ( !values )
+ CV_ERROR( CV_StsInternal, "data is empty" );
+ __END__;
+
return train_sample_idx;
}
-const CvMat* CvMLData::get_test_sample_idx()
+const CvMat* CvMLData::get_test_sample_idx() const
{
+ CV_FUNCNAME( "CvMLData::get_test_sample_idx" );
+ __BEGIN__;
+
+ if ( !values )
+ CV_ERROR( CV_StsInternal, "data is empty" );
+ __END__;
+
return test_sample_idx;
}
void CvMLData::mix_train_and_test_idx()
{
- if ( !values || !sample_idx) return;
+ CV_FUNCNAME( "CvMLData::mix_train_and_test_idx" );
+ __BEGIN__;
+
+ if ( !values )
+ CV_ERROR( CV_StsInternal, "data is empty" );
+ __END__;
+
+ if ( !sample_idx)
+ return;
if ( train_sample_count > 0 && train_sample_count < values->rows )
{