-// Copyright Sergey Karayev 2014
/*
+Contributors:
+- Sergey Karayev, 2014.
+- Tobias Domhan, 2014.
+
TODO:
- only load parts of the file, in accordance with a prototxt param "max_mem"
*/
-
#include <stdint.h>
#include <string>
#include <vector>
HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { }
template <typename Dtype>
-void HDF5DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
- CHECK_EQ(bottom.size(), 0) << "HDF5DataLayer takes no input blobs.";
- CHECK_EQ(top->size(), 2) << "HDF5DataLayer takes two blobs as output.";
-
- // Load the HDF5 file and initialize the counter.
- const char* hdf_filename = this->layer_param_.source().c_str();
- LOG(INFO) << "Loading HDF5 file" << hdf_filename;
- hid_t file_id = H5Fopen(hdf_filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+void HDF5DataLayer<Dtype>::load_hdf5_file(const char* filename) {
+ LOG(INFO) << "Loading HDF5 file" << filename;
+ hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
if (file_id < 0) {
- LOG(ERROR) << "Failed opening HDF5 file" << hdf_filename;
+ LOG(ERROR) << "Failed opening HDF5 file" << filename;
return;
}
+ const int MIN_DATA_DIM = 2;
const int MAX_DATA_DIM = 4;
+ const int MIN_LABEL_DIM = 1;
const int MAX_LABEL_DIM = 2;
- const int MIN_DIM = 2;
- hd5_load_nd_dataset(file_id, "data", MIN_DIM, MAX_DATA_DIM,
+ hdf5_load_nd_dataset(file_id, "data", MIN_DATA_DIM, MAX_DATA_DIM,
&data_, data_dims_);
- hd5_load_nd_dataset(file_id, "label", MIN_DIM, MAX_LABEL_DIM,
+ hdf5_load_nd_dataset(file_id, "label", MIN_LABEL_DIM, MAX_LABEL_DIM,
&label_, label_dims_);
- while(data_dims_.size() < MAX_DATA_DIM) {
+ // Add missing dimensions.
+ const int MAX_BLOB_DIM = 4;
+ while(data_dims_.size() < MAX_BLOB_DIM) {
data_dims_.push_back(1);
}
-
- //add missing dimensions:
- label_dims_.push_back(1);
- label_dims_.push_back(1);
+ while(label_dims_.size() < MAX_BLOB_DIM) {
+ label_dims_.push_back(1);
+ }
herr_t status = H5Fclose(file_id);
CHECK_EQ(data_dims_[0], label_dims_[0]);
LOG(INFO) << "Successully loaded " << data_dims_[0] << " rows";
+}
+
+template <typename Dtype>
+void HDF5DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ CHECK_EQ(bottom.size(), 0) << "HDF5DataLayer takes no input blobs.";
+ CHECK_EQ(top->size(), 2) << "HDF5DataLayer takes two blobs as output.";
+
+ // Load the HDF5 file and initialize the counter.
+ const char* hdf_filename = this->layer_param_.source().c_str();
+ load_hdf5_file(hdf_filename);
current_row_ = 0;
// Reshape blobs.
return true;
}
+// Verifies format of data stored in the HDF5 file.
+// Returns the total size of the data in the file.
+int hdf5_verify_and_get_size_of_nd_dataset(
+ hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
+ std::vector<hsize_t>& out_dims) {
+ // Verify that the number of dimensions is in the accepted range.
+ herr_t status;
+ int ndims;
+ status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
+ CHECK_GE(ndims, min_dim);
+ CHECK_LE(ndims, max_dim);
+
+ // Verify that the data format is what we expect: float or double.
+ boost::scoped_ptr<hsize_t> dims(new hsize_t[ndims]);
+ H5T_class_t class_;
+ status = H5LTget_dataset_info(
+ file_id, dataset_name_, dims.get(), &class_, NULL);
+ CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data";
+
+ // Establish and return the total data size.
+ int array_size = 1;
+ for (int i=0; i<ndims; ++i) {
+ out_dims.push_back(dims.get()[i]);
+ array_size *= dims.get()[i];
+ }
+ return array_size;
+}
+
+// Read float data from HDF5 file into array, storing dimensions in out_dims.
template <>
-void hd5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_,
+void hdf5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_,
int min_dim, int max_dim,
boost::scoped_ptr<float>* array, std::vector<hsize_t>& out_dims) {
- herr_t status;
-
- int ndims;
- status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
- CHECK_GE(ndims, min_dim);
- CHECK_LE(ndims, max_dim);
-
- boost::scoped_ptr<hsize_t> dims(new hsize_t[ndims]);
-
- H5T_class_t class_;
- status = H5LTget_dataset_info(
- file_id, dataset_name_, dims.get(), &class_, NULL);
- CHECK_EQ(class_, H5T_FLOAT) << "Epected float data";
-
- int array_size = 1;
- for (int i=0; i<ndims; ++i) {
- out_dims.push_back(dims.get()[i]);
- array_size *= dims.get()[i];
- }
-
- array->reset(new float[array_size]);
- status = H5LTread_dataset_float(
- file_id, dataset_name_, array->get());
+ int array_size = hdf5_verify_and_get_size_of_nd_dataset(
+ file_id, dataset_name_, min_dim, max_dim, out_dims);
+ array->reset(new float[array_size]);
+ herr_t status = H5LTread_dataset_float(
+ file_id, dataset_name_, array->get());
}
+// Read double data from HDF5 file into array, storing dimensions in out_dims.
template <>
-void hd5_load_nd_dataset<double>(hid_t file_id, const char* dataset_name_,
+void hdf5_load_nd_dataset<double>(hid_t file_id, const char* dataset_name_,
int min_dim, int max_dim,
boost::scoped_ptr<double>* array, std::vector<hsize_t>& out_dims) {
- herr_t status;
-
- int ndims;
- status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
- CHECK_GE(ndims, min_dim);
- CHECK_LE(ndims, max_dim);
-
- boost::scoped_ptr<hsize_t> dims(new hsize_t[ndims]);
-
- H5T_class_t class_;
- status = H5LTget_dataset_info(
- file_id, dataset_name_, dims.get(), &class_, NULL);
- CHECK_EQ(class_, H5T_FLOAT) << "Epected float data";
-
- int array_size = 1;
- for (int i=0; i<ndims; ++i) {
- out_dims.push_back(dims.get()[i]);
- array_size *= dims.get()[i];
- }
-
- array->reset(new double[array_size]);
- status = H5LTread_dataset_double(
- file_id, dataset_name_, array->get());
+ int array_size = hdf5_verify_and_get_size_of_nd_dataset(
+ file_id, dataset_name_, min_dim, max_dim, out_dims);
+ array->reset(new double[array_size]);
+ herr_t status = H5LTread_dataset_double(
+ file_id, dataset_name_, array->get());
}
} // namespace caffe