From 5669f61927ed8131f223e1b4db823a00eb74896a Mon Sep 17 00:00:00 2001 From: Sergey Karayev Date: Mon, 17 Mar 2014 12:19:44 -0700 Subject: [PATCH] HDF5 data now loaded into Blobs; cleaner interface --- include/caffe/util/io.hpp | 15 ++++++------ include/caffe/vision_layers.hpp | 9 ++++--- src/caffe/layers/hdf5_data_layer.cpp | 44 +++++++++++++++------------------- src/caffe/layers/hdf5_data_layer.cu | 8 +++---- src/caffe/util/io.cpp | 46 ++++++++++++++---------------------- 5 files changed, 52 insertions(+), 70 deletions(-) diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index f04dd1d..1dcdb7e 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -10,7 +10,6 @@ #include "hdf5_hl.h" #include "caffe/proto/caffe.pb.h" -#include "boost/scoped_ptr.hpp" #include "caffe/blob.hpp" using std::string; @@ -52,14 +51,14 @@ inline bool ReadImageToDatum(const string& filename, const int label, } template +void hdf5_load_nd_dataset_helper( + hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, + Blob& blob); + +template void hdf5_load_nd_dataset( - hid_t file_id, const char* dataset_name_, - int min_dim,//inclusive - int max_dim,//inclusive - //output: - boost::scoped_ptr* array, - std::vector& dims - ); + hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, + Blob& blob); } // namespace caffe diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 4b14220..8ab1afb 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -389,7 +389,6 @@ class HDF5DataLayer : public Layer { vector*>* top); protected: - virtual void load_hdf5_file(const char* filename); virtual void Forward_cpu(const vector*>& bottom, vector*>* top); @@ -400,15 +399,15 @@ class HDF5DataLayer : public Layer { virtual Dtype Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); + virtual void load_hdf5_file_data(const char* filename); + std::vector hdf_filenames_; unsigned int num_files_; unsigned int current_file_; hsize_t current_row_; - boost::scoped_ptr data_; - boost::scoped_ptr label_; - std::vector data_dims_; - std::vector label_dims_; + Blob data_blob_; + Blob label_blob_; }; diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index 7f993a6..5b568a8 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -27,35 +27,29 @@ namespace caffe { template HDF5DataLayer::~HDF5DataLayer() { } +// Load data and label from HDF5 filename into the class property blobs. template -void HDF5DataLayer::load_hdf5_file(const char* filename) { +void HDF5DataLayer::load_hdf5_file_data(const char* filename) { LOG(INFO) << "Loading HDF5 file" << filename; hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); if (file_id < 0) { LOG(ERROR) << "Failed opening HDF5 file" << filename; return; } + const int MIN_DATA_DIM = 2; const int MAX_DATA_DIM = 4; + hdf5_load_nd_dataset( + file_id, "data", MIN_DATA_DIM, MAX_DATA_DIM, data_blob_); + const int MIN_LABEL_DIM = 1; const int MAX_LABEL_DIM = 2; - hdf5_load_nd_dataset(file_id, "data", MIN_DATA_DIM, MAX_DATA_DIM, - &data_, data_dims_); - hdf5_load_nd_dataset(file_id, "label", MIN_LABEL_DIM, MAX_LABEL_DIM, - &label_, label_dims_); - - // Add missing dimensions. - const int MAX_BLOB_DIM = 4; - while(data_dims_.size() < MAX_BLOB_DIM) { - data_dims_.push_back(1); - } - while(label_dims_.size() < MAX_BLOB_DIM) { - label_dims_.push_back(1); - } + hdf5_load_nd_dataset( + file_id, "label", MIN_LABEL_DIM, MAX_LABEL_DIM, label_blob_); herr_t status = H5Fclose(file_id); - CHECK_EQ(data_dims_[0], label_dims_[0]); - LOG(INFO) << "Successully loaded " << data_dims_[0] << " rows"; + CHECK_EQ(data_blob_.num(), label_blob_.num()); + LOG(INFO) << "Successully loaded " << data_blob_.num() << " rows"; } template @@ -80,14 +74,14 @@ void HDF5DataLayer::SetUp(const vector*>& bottom, LOG(INFO) << "Number of files: " << num_files_; // Load the first HDF5 file and initialize the line counter. - load_hdf5_file(hdf_filenames_[current_file_].c_str()); + load_hdf5_file_data(hdf_filenames_[current_file_].c_str()); current_row_ = 0; // Reshape blobs. - (*top)[0]->Reshape(this->layer_param_.batchsize(), - data_dims_[1], data_dims_[2], data_dims_[3]); - (*top)[1]->Reshape(this->layer_param_.batchsize(), - label_dims_[1], label_dims_[2], label_dims_[3]); + (*top)[0]->Reshape(this->layer_param_.batchsize(), data_blob_.channels(), + data_blob_.width(), data_blob_.height()); + (*top)[1]->Reshape(this->layer_param_.batchsize(), label_blob_.channels(), + label_blob_.width(), label_blob_.height()); LOG(INFO) << "output data size: " << (*top)[0]->num() << "," << (*top)[0]->channels() << "," << (*top)[0]->height() << "," << (*top)[0]->width(); @@ -101,7 +95,7 @@ void HDF5DataLayer::Forward_cpu(const vector*>& bottom, const int label_data_count = (*top)[1]->count() / (*top)[1]->num(); for (int i = 0; i < batchsize; ++i, ++current_row_) { - if (current_row_ == data_dims_[0]) { + if (current_row_ == data_blob_.num()) { if (num_files_ > 1) { current_file_ += 1; @@ -110,17 +104,17 @@ void HDF5DataLayer::Forward_cpu(const vector*>& bottom, LOG(INFO) << "looping around to first file"; } - load_hdf5_file(hdf_filenames_[current_file_].c_str()); + load_hdf5_file_data(hdf_filenames_[current_file_].c_str()); } current_row_ = 0; } memcpy(&(*top)[0]->mutable_cpu_data()[i * data_count], - &(data_.get()[current_row_ * data_count]), + &data_blob_.mutable_cpu_data()[current_row_ * data_count], sizeof(Dtype) * data_count); memcpy(&(*top)[1]->mutable_cpu_data()[i * label_data_count], - &(label_.get()[current_row_ * label_data_count]), + &label_blob_.mutable_cpu_data()[current_row_ * label_data_count], sizeof(Dtype) * label_data_count); } } diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu index 7a31a60..f1a6434 100644 --- a/src/caffe/layers/hdf5_data_layer.cu +++ b/src/caffe/layers/hdf5_data_layer.cu @@ -27,7 +27,7 @@ void HDF5DataLayer::Forward_gpu(const vector*>& bottom, const int label_data_count = (*top)[1]->count() / (*top)[1]->num(); for (int i = 0; i < batchsize; ++i, ++current_row_) { - if (current_row_ == data_dims_[0]) { + if (current_row_ == data_blob_.num()) { if (num_files_ > 1) { current_file_ += 1; @@ -36,20 +36,20 @@ void HDF5DataLayer::Forward_gpu(const vector*>& bottom, LOG(INFO) << "looping around to first file"; } - load_hdf5_file(hdf_filenames_[current_file_].c_str()); + load_hdf5_file_data(hdf_filenames_[current_file_].c_str()); } current_row_ = 0; } CUDA_CHECK(cudaMemcpy( &(*top)[0]->mutable_gpu_data()[i * data_count], - &(data_.get()[current_row_ * data_count]), + &data_blob_.mutable_cpu_data()[current_row_ * data_count], sizeof(Dtype) * data_count, cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMemcpy( &(*top)[1]->mutable_gpu_data()[i * label_data_count], - &(label_.get()[current_row_ * label_data_count]), + &label_blob_.mutable_cpu_data()[current_row_ * label_data_count], sizeof(Dtype) * label_data_count, cudaMemcpyHostToDevice)); } diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 0f987c9..72ceb8d 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -100,11 +100,10 @@ bool ReadImageToDatum(const string& filename, const int label, return true; } -// Verifies format of data stored in the HDF5 file. -// Returns the total size of the data in the file. -int hdf5_verify_and_get_size_of_nd_dataset( - hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, - std::vector& out_dims) { +// Verifies format of data stored in HDF5 file and reshapes blob accordingly. +template +void hdf5_load_nd_dataset_helper( + hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob& blob) { // Verify that the number of dimensions is in the accepted range. herr_t status; int ndims; @@ -113,43 +112,34 @@ int hdf5_verify_and_get_size_of_nd_dataset( CHECK_LE(ndims, max_dim); // Verify that the data format is what we expect: float or double. - boost::scoped_ptr dims(new hsize_t[ndims]); + std::vector dims(ndims); H5T_class_t class_; status = H5LTget_dataset_info( - file_id, dataset_name_, dims.get(), &class_, NULL); + file_id, dataset_name_, dims.data(), &class_, NULL); CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data"; - // Establish and return the total data size. - int array_size = 1; - for (int i=0; i 1) ? dims[1] : 1, + (dims.size() > 2) ? dims[2] : 1, + (dims.size() > 3) ? dims[3] : 1 + ); } -// Read float data from HDF5 file into array, storing dimensions in out_dims. template <> void hdf5_load_nd_dataset(hid_t file_id, const char* dataset_name_, - int min_dim, int max_dim, - boost::scoped_ptr* array, std::vector& out_dims) { - int array_size = hdf5_verify_and_get_size_of_nd_dataset( - file_id, dataset_name_, min_dim, max_dim, out_dims); - array->reset(new float[array_size]); + int min_dim, int max_dim, Blob& blob) { + hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob); herr_t status = H5LTread_dataset_float( - file_id, dataset_name_, array->get()); + file_id, dataset_name_, blob.mutable_cpu_data()); } -// Read double data from HDF5 file into array, storing dimensions in out_dims. template <> void hdf5_load_nd_dataset(hid_t file_id, const char* dataset_name_, - int min_dim, int max_dim, - boost::scoped_ptr* array, std::vector& out_dims) { - int array_size = hdf5_verify_and_get_size_of_nd_dataset( - file_id, dataset_name_, min_dim, max_dim, out_dims); - array->reset(new double[array_size]); + int min_dim, int max_dim, Blob& blob) { + hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob); herr_t status = H5LTread_dataset_double( - file_id, dataset_name_, array->get()); + file_id, dataset_name_, blob.mutable_cpu_data()); } } // namespace caffe -- 2.7.4