3 - load file in a separate thread ("prefetch")
4 - can be smarter about the memcpy call instead of doing it row-by-row
5 :: use util functions caffe_copy, and Blob->offset()
6 :: don't forget to update hdf5_daa_layer.cu accordingly
7 - add ability to shuffle filenames if flag is set
9 #include <fstream> // NOLINT(readability/streams)
17 #include "caffe/layers/hdf5_data_layer.hpp"
18 #include "caffe/util/hdf5.hpp"
22 template <typename Dtype>
23 HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { }
25 // Load data and label from HDF5 filename into the class property blobs.
26 template <typename Dtype>
27 void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
28 DLOG(INFO) << "Loading HDF5 file: " << filename;
29 hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
31 LOG(FATAL) << "Failed opening HDF5 file: " << filename;
34 int top_size = this->layer_param_.top_size();
35 hdf_blobs_.resize(top_size);
37 const int MIN_DATA_DIM = 1;
38 const int MAX_DATA_DIM = INT_MAX;
40 for (int i = 0; i < top_size; ++i) {
41 hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>());
42 hdf5_load_nd_dataset(file_id, this->layer_param_.top(i).c_str(),
43 MIN_DATA_DIM, MAX_DATA_DIM, hdf_blobs_[i].get());
46 herr_t status = H5Fclose(file_id);
47 CHECK_GE(status, 0) << "Failed to close HDF5 file: " << filename;
49 // MinTopBlobs==1 guarantees at least one top blob
50 CHECK_GE(hdf_blobs_[0]->num_axes(), 1) << "Input must have at least 1 axis.";
51 const int num = hdf_blobs_[0]->shape(0);
52 for (int i = 1; i < top_size; ++i) {
53 CHECK_EQ(hdf_blobs_[i]->shape(0), num);
55 // Default to identity permutation.
56 data_permutation_.clear();
57 data_permutation_.resize(hdf_blobs_[0]->shape(0));
58 for (int i = 0; i < hdf_blobs_[0]->shape(0); i++)
59 data_permutation_[i] = i;
62 if (this->layer_param_.hdf5_data_param().shuffle()) {
63 std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
64 DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0)
65 << " rows (shuffled)";
67 DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows";
71 template <typename Dtype>
72 void HDF5DataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
73 const vector<Blob<Dtype>*>& top) {
74 // Refuse transformation parameters since HDF5 is totally generic.
75 CHECK(!this->layer_param_.has_transform_param()) <<
76 this->type() << " does not transform data.";
77 // Read the source to parse the filenames.
78 const string& source = this->layer_param_.hdf5_data_param().source();
79 LOG(INFO) << "Loading list of HDF5 filenames from: " << source;
80 hdf_filenames_.clear();
81 std::ifstream source_file(source.c_str());
82 if (source_file.is_open()) {
84 while (source_file >> line) {
85 hdf_filenames_.push_back(line);
88 LOG(FATAL) << "Failed to open source file: " << source;
91 num_files_ = hdf_filenames_.size();
93 LOG(INFO) << "Number of HDF5 files: " << num_files_;
94 CHECK_GE(num_files_, 1) << "Must have at least 1 HDF5 filename listed in "
97 file_permutation_.clear();
98 file_permutation_.resize(num_files_);
99 // Default to identity permutation.
100 for (int i = 0; i < num_files_; i++) {
101 file_permutation_[i] = i;
104 // Shuffle if needed.
105 if (this->layer_param_.hdf5_data_param().shuffle()) {
106 std::random_shuffle(file_permutation_.begin(), file_permutation_.end());
109 // Load the first HDF5 file and initialize the line counter.
110 LoadHDF5FileData(hdf_filenames_[file_permutation_[current_file_]].c_str());
114 const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
115 const int top_size = this->layer_param_.top_size();
116 vector<int> top_shape;
117 for (int i = 0; i < top_size; ++i) {
118 top_shape.resize(hdf_blobs_[i]->num_axes());
119 top_shape[0] = batch_size;
120 for (int j = 1; j < top_shape.size(); ++j) {
121 top_shape[j] = hdf_blobs_[i]->shape(j);
123 top[i]->Reshape(top_shape);
127 template <typename Dtype>
128 void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
129 const vector<Blob<Dtype>*>& top) {
130 const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
131 for (int i = 0; i < batch_size; ++i, ++current_row_) {
132 if (current_row_ == hdf_blobs_[0]->shape(0)) {
133 if (num_files_ > 1) {
135 if (current_file_ == num_files_) {
137 if (this->layer_param_.hdf5_data_param().shuffle()) {
138 std::random_shuffle(file_permutation_.begin(),
139 file_permutation_.end());
141 DLOG(INFO) << "Looping around to first file.";
144 hdf_filenames_[file_permutation_[current_file_]].c_str());
147 if (this->layer_param_.hdf5_data_param().shuffle())
148 std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
150 for (int j = 0; j < this->layer_param_.top_size(); ++j) {
151 int data_dim = top[j]->count() / top[j]->shape(0);
153 &hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_]
154 * data_dim], &top[j]->mutable_cpu_data()[i * data_dim]);
160 STUB_GPU_FORWARD(HDF5DataLayer, Forward);
163 INSTANTIATE_CLASS(HDF5DataLayer);
164 REGISTER_LAYER_CLASS(HDF5Data);