#include "hdf5_hl.h"
#include "caffe/proto/caffe.pb.h"
-#include "boost/scoped_ptr.hpp"
#include "caffe/blob.hpp"
using std::string;
}
template <typename Dtype>
+void hdf5_load_nd_dataset_helper(
+ hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
+ Blob<Dtype>& blob);
+
+template <typename Dtype>
void hdf5_load_nd_dataset(
- hid_t file_id, const char* dataset_name_,
- int min_dim,//inclusive
- int max_dim,//inclusive
- //output:
- boost::scoped_ptr<Dtype>* array,
- std::vector<hsize_t>& dims
- );
+ hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
+ Blob<Dtype>& blob);
} // namespace caffe
vector<Blob<Dtype>*>* top);
protected:
- virtual void load_hdf5_file(const char* filename);
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+ virtual void load_hdf5_file_data(const char* filename);
+
std::vector<std::string> hdf_filenames_;
unsigned int num_files_;
unsigned int current_file_;
hsize_t current_row_;
- boost::scoped_ptr<Dtype> data_;
- boost::scoped_ptr<Dtype> label_;
- std::vector<hsize_t> data_dims_;
- std::vector<hsize_t> label_dims_;
+ Blob<Dtype> data_blob_;
+ Blob<Dtype> label_blob_;
};
template <typename Dtype>
HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { }
+// Load data and label from HDF5 filename into the class property blobs.
template <typename Dtype>
-void HDF5DataLayer<Dtype>::load_hdf5_file(const char* filename) {
+void HDF5DataLayer<Dtype>::load_hdf5_file_data(const char* filename) {
LOG(INFO) << "Loading HDF5 file" << filename;
hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
if (file_id < 0) {
LOG(ERROR) << "Failed opening HDF5 file" << filename;
return;
}
+
const int MIN_DATA_DIM = 2;
const int MAX_DATA_DIM = 4;
+ hdf5_load_nd_dataset(
+ file_id, "data", MIN_DATA_DIM, MAX_DATA_DIM, data_blob_);
+
const int MIN_LABEL_DIM = 1;
const int MAX_LABEL_DIM = 2;
- hdf5_load_nd_dataset(file_id, "data", MIN_DATA_DIM, MAX_DATA_DIM,
- &data_, data_dims_);
- hdf5_load_nd_dataset(file_id, "label", MIN_LABEL_DIM, MAX_LABEL_DIM,
- &label_, label_dims_);
-
- // Add missing dimensions.
- const int MAX_BLOB_DIM = 4;
- while(data_dims_.size() < MAX_BLOB_DIM) {
- data_dims_.push_back(1);
- }
- while(label_dims_.size() < MAX_BLOB_DIM) {
- label_dims_.push_back(1);
- }
+ hdf5_load_nd_dataset(
+ file_id, "label", MIN_LABEL_DIM, MAX_LABEL_DIM, label_blob_);
herr_t status = H5Fclose(file_id);
- CHECK_EQ(data_dims_[0], label_dims_[0]);
- LOG(INFO) << "Successully loaded " << data_dims_[0] << " rows";
+ CHECK_EQ(data_blob_.num(), label_blob_.num());
+ LOG(INFO) << "Successully loaded " << data_blob_.num() << " rows";
}
template <typename Dtype>
LOG(INFO) << "Number of files: " << num_files_;
// Load the first HDF5 file and initialize the line counter.
- load_hdf5_file(hdf_filenames_[current_file_].c_str());
+ load_hdf5_file_data(hdf_filenames_[current_file_].c_str());
current_row_ = 0;
// Reshape blobs.
- (*top)[0]->Reshape(this->layer_param_.batchsize(),
- data_dims_[1], data_dims_[2], data_dims_[3]);
- (*top)[1]->Reshape(this->layer_param_.batchsize(),
- label_dims_[1], label_dims_[2], label_dims_[3]);
+ (*top)[0]->Reshape(this->layer_param_.batchsize(), data_blob_.channels(),
+ data_blob_.width(), data_blob_.height());
+ (*top)[1]->Reshape(this->layer_param_.batchsize(), label_blob_.channels(),
+ label_blob_.width(), label_blob_.height());
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
<< (*top)[0]->width();
const int label_data_count = (*top)[1]->count() / (*top)[1]->num();
for (int i = 0; i < batchsize; ++i, ++current_row_) {
- if (current_row_ == data_dims_[0]) {
+ if (current_row_ == data_blob_.num()) {
if (num_files_ > 1) {
current_file_ += 1;
LOG(INFO) << "looping around to first file";
}
- load_hdf5_file(hdf_filenames_[current_file_].c_str());
+ load_hdf5_file_data(hdf_filenames_[current_file_].c_str());
}
current_row_ = 0;
}
memcpy(&(*top)[0]->mutable_cpu_data()[i * data_count],
- &(data_.get()[current_row_ * data_count]),
+ &data_blob_.mutable_cpu_data()[current_row_ * data_count],
sizeof(Dtype) * data_count);
memcpy(&(*top)[1]->mutable_cpu_data()[i * label_data_count],
- &(label_.get()[current_row_ * label_data_count]),
+ &label_blob_.mutable_cpu_data()[current_row_ * label_data_count],
sizeof(Dtype) * label_data_count);
}
}
const int label_data_count = (*top)[1]->count() / (*top)[1]->num();
for (int i = 0; i < batchsize; ++i, ++current_row_) {
- if (current_row_ == data_dims_[0]) {
+ if (current_row_ == data_blob_.num()) {
if (num_files_ > 1) {
current_file_ += 1;
LOG(INFO) << "looping around to first file";
}
- load_hdf5_file(hdf_filenames_[current_file_].c_str());
+ load_hdf5_file_data(hdf_filenames_[current_file_].c_str());
}
current_row_ = 0;
}
CUDA_CHECK(cudaMemcpy(
&(*top)[0]->mutable_gpu_data()[i * data_count],
- &(data_.get()[current_row_ * data_count]),
+ &data_blob_.mutable_cpu_data()[current_row_ * data_count],
sizeof(Dtype) * data_count,
cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(
&(*top)[1]->mutable_gpu_data()[i * label_data_count],
- &(label_.get()[current_row_ * label_data_count]),
+ &label_blob_.mutable_cpu_data()[current_row_ * label_data_count],
sizeof(Dtype) * label_data_count,
cudaMemcpyHostToDevice));
}
return true;
}
-// Verifies format of data stored in the HDF5 file.
-// Returns the total size of the data in the file.
-int hdf5_verify_and_get_size_of_nd_dataset(
- hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
- std::vector<hsize_t>& out_dims) {
+// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
+template <typename Dtype>
+void hdf5_load_nd_dataset_helper(
+ hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob<Dtype>& blob) {
// Verify that the number of dimensions is in the accepted range.
herr_t status;
int ndims;
CHECK_LE(ndims, max_dim);
// Verify that the data format is what we expect: float or double.
- boost::scoped_ptr<hsize_t> dims(new hsize_t[ndims]);
+ std::vector<hsize_t> dims(ndims);
H5T_class_t class_;
status = H5LTget_dataset_info(
- file_id, dataset_name_, dims.get(), &class_, NULL);
+ file_id, dataset_name_, dims.data(), &class_, NULL);
CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data";
- // Establish and return the total data size.
- int array_size = 1;
- for (int i=0; i<ndims; ++i) {
- out_dims.push_back(dims.get()[i]);
- array_size *= dims.get()[i];
- }
- return array_size;
+ blob.Reshape(
+ dims[0],
+ (dims.size() > 1) ? dims[1] : 1,
+ (dims.size() > 2) ? dims[2] : 1,
+ (dims.size() > 3) ? dims[3] : 1
+ );
}
-// Read float data from HDF5 file into array, storing dimensions in out_dims.
template <>
void hdf5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_,
- int min_dim, int max_dim,
- boost::scoped_ptr<float>* array, std::vector<hsize_t>& out_dims) {
- int array_size = hdf5_verify_and_get_size_of_nd_dataset(
- file_id, dataset_name_, min_dim, max_dim, out_dims);
- array->reset(new float[array_size]);
+ int min_dim, int max_dim, Blob<float>& blob) {
+ hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob);
herr_t status = H5LTread_dataset_float(
- file_id, dataset_name_, array->get());
+ file_id, dataset_name_, blob.mutable_cpu_data());
}
-// Read double data from HDF5 file into array, storing dimensions in out_dims.
template <>
void hdf5_load_nd_dataset<double>(hid_t file_id, const char* dataset_name_,
- int min_dim, int max_dim,
- boost::scoped_ptr<double>* array, std::vector<hsize_t>& out_dims) {
- int array_size = hdf5_verify_and_get_size_of_nd_dataset(
- file_id, dataset_name_, min_dim, max_dim, out_dims);
- array->reset(new double[array_size]);
+ int min_dim, int max_dim, Blob<double>& blob) {
+ hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob);
herr_t status = H5LTread_dataset_double(
- file_id, dataset_name_, array->get());
+ file_id, dataset_name_, blob.mutable_cpu_data());
}
} // namespace caffe