INCLUDE_DIRS += ./src ./include $(CUDA_INCLUDE_DIR) $(MKL_INCLUDE_DIR)
LIBRARY_DIRS += $(CUDA_LIB_DIR) $(MKL_LIB_DIR)
-LIBRARIES := cudart cublas curand mkl_rt pthread \
- glog protobuf leveldb snappy boost_system \
+LIBRARIES := cudart cublas curand \
+ mkl_rt \
+ pthread \
+ glog protobuf leveldb \
+ snappy \
+ boost_system \
+ hdf5 hdf5_hl \
opencv_core opencv_highgui opencv_imgproc
PYTHON_LIBRARIES := boost_python python2.7
WARNINGS := -Wall
#include <google/protobuf/message.h>
+#include <boost/scoped_ptr.hpp>
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
#include <string>
#include "caffe/blob.hpp"
return ReadImageToDatum(filename, label, 0, 0, datum);
}
+template <typename Dtype>
+void load_2d_dataset(
+ hid_t file_id, const char* dataset_name_,
+ boost::scoped_ptr<Dtype>* array, hsize_t* dims);
+
} // namespace caffe
#endif // CAFFE_UTIL_IO_H_
#include <leveldb/db.h>
#include <pthread.h>
+#include <boost/scoped_ptr.hpp>
+
+#include "hdf5.h"
#include <vector>
template <typename Dtype>
+class HDF5DataLayer : public Layer<Dtype> {
+ public:
+ explicit HDF5DataLayer(const LayerParameter& param)
+ : Layer<Dtype>(param) {}
+ virtual ~HDF5DataLayer();
+ virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+
+ protected:
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
+ virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+ virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+
+ boost::scoped_ptr<Dtype> data;
+ boost::scoped_ptr<Dtype> label;
+ hsize_t data_dims[2];
+ hsize_t label_dims[2];
+ hsize_t current_row;
+};
+
+
+template <typename Dtype>
class SoftmaxLayer : public Layer<Dtype> {
public:
explicit SoftmaxLayer(const LayerParameter& param)
return new ConvolutionLayer<Dtype>(param);
} else if (type == "data") {
return new DataLayer<Dtype>(param);
+ } else if (type == "hdf5_data") {
+ return new HDF5DataLayer<Dtype>(param);
} else if (type == "dropout") {
return new DropoutLayer<Dtype>(param);
} else if (type == "euclidean_loss") {
--- /dev/null
+/*
+TODO:
+- only load parts of the file, in accordance with a prototxt param "max_mem"
+- How does Dtype affect the loading of the data, which is always float?
+*/
+
+#include <iostream>
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+#include "hdf5.h"
+#include "hdf5_hl.h"
+
+#include "caffe/layer.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/vision_layers.hpp"
+
+using std::string;
+
+namespace caffe {
+
+template <typename Dtype>
+HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { }
+
+template <typename Dtype>
+void HDF5DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ CHECK_EQ(bottom.size(), 0) << "HDF5DataLayer takes no input blobs.";
+ CHECK_EQ(top->size(), 2) << "HDF5DataLayer takes two blobs as output.";
+
+ // Load the HDF5 file and initialize the counter.
+ const char* hdf_filename = this->layer_param_.source().c_str();
+ LOG(INFO) << "Loading HDF5 file" << hdf_filename;
+ hid_t file_id = H5Fopen(hdf_filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+ load_2d_dataset(file_id, "data", &data, data_dims);
+ load_2d_dataset(file_id, "label", &label, label_dims);
+ herr_t status = H5Fclose(file_id);
+ assert(data_dims[0] == label_dims[0]);
+ current_row = 0;
+
+ // Reshape blobs.
+ (*top)[0]->Reshape(this->layer_param_.batchsize(), data_dims[1], 1, 1);
+ (*top)[1]->Reshape(this->layer_param_.batchsize(), label_dims[1], 1, 1);
+ LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
+ << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
+ << (*top)[0]->width();
+}
+
+template <typename Dtype>
+void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const int batchsize = this->layer_param_.batchsize();
+ for (int i = 0; i < batchsize; ++i, ++current_row) {
+ if (current_row == data_dims[0]) {
+ current_row = 0;
+ }
+
+ memcpy( &(*top)[0]->mutable_cpu_data()[i * data_dims[1]],
+ &(data.get()[current_row * data_dims[1]]),
+ sizeof(Dtype) * data_dims[1]);
+
+ memcpy( &(*top)[1]->mutable_cpu_data()[i * label_dims[1]],
+ &(label.get()[current_row * label_dims[1]]),
+ sizeof(Dtype) * label_dims[1]);
+ }
+}
+
+template <typename Dtype>
+void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const int batchsize = this->layer_param_.batchsize();
+ for (int i = 0; i < batchsize; ++i, ++current_row) {
+ if (current_row == data_dims[0]) {
+ current_row = 0;
+ }
+
+ CUDA_CHECK(cudaMemcpy(
+ &(*top)[0]->mutable_gpu_data()[i * data_dims[1]],
+ &(data.get()[current_row * data_dims[1]]),
+ sizeof(Dtype) * data_dims[1],
+ cudaMemcpyHostToDevice));
+
+ CUDA_CHECK(cudaMemcpy(
+ &(*top)[1]->mutable_gpu_data()[i * label_dims[1]],
+ &(label.get()[current_row * label_dims[1]]),
+ sizeof(Dtype) * label_dims[1],
+ cudaMemcpyHostToDevice));
+ }
+}
+
+// The backward operations are dummy - they do not carry any computation.
+template <typename Dtype>
+Dtype HDF5DataLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ return Dtype(0.);
+}
+
+template <typename Dtype>
+Dtype HDF5DataLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ return Dtype(0.);
+}
+
+INSTANTIATE_CLASS(HDF5DataLayer);
+
+} // namespace caffe
--- /dev/null
+"""
+Generate data used in the HDF5DataLayer test.
+"""
+
+import numpy as np
+import h5py
+
+num_cols = 8
+num_rows = 10
+data = np.arange(num_cols * num_rows).reshape(num_rows, num_cols)
+label = np.arange(num_rows)[:, np.newaxis]
+print data
+print label
+
+with h5py.File('./sample_data.h5', 'w') as f:
+ f['data'] = data.astype('float32')
+ f['label'] = label.astype('float32')
--- /dev/null
+// Copyright 2013 Yangqing Jia
+
+#include <cuda_runtime.h>
+#include <leveldb/db.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/test/test_caffe_main.hpp"
+
+using std::string;
+
+namespace caffe {
+
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
+
+template <typename Dtype>
+class HDF5DataLayerTest : public ::testing::Test {
+ protected:
+ HDF5DataLayerTest()
+ : blob_top_data_(new Blob<Dtype>()),
+ blob_top_label_(new Blob<Dtype>()),
+ filename(NULL) {};
+ virtual void SetUp() {
+ blob_top_vec_.push_back(blob_top_data_);
+ blob_top_vec_.push_back(blob_top_label_);
+
+ // TODO: generate sample HDF5 file on the fly.
+ // For now, use example HDF5 file.
+ // TODO: how to best deal with the relativeness of the path?
+ filename = "src/caffe/test/test_data/sample_data.h5";
+ LOG(INFO) << "Using sample HDF5 data file " << filename;
+ };
+
+ virtual ~HDF5DataLayerTest() {
+ delete blob_top_data_;
+ delete blob_top_label_;
+ }
+
+ char* filename;
+ Blob<Dtype>* const blob_top_data_;
+ Blob<Dtype>* const blob_top_label_;
+ vector<Blob<Dtype>*> blob_bottom_vec_;
+ vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+typedef ::testing::Types<float, double> Dtypes;
+TYPED_TEST_CASE(HDF5DataLayerTest, Dtypes);
+
+TYPED_TEST(HDF5DataLayerTest, TestRead) {
+ // Create LayerParameter with the known parameters.
+ // The data file we are reading has 10 rows and 8 columns,
+ // with values from 0 to 10*8 reshaped in row-major order.
+ LayerParameter param;
+ int batchsize = 5;
+ param.set_batchsize(batchsize);
+ param.set_source(this->filename);
+ int num_rows = 10;
+ int num_cols = 8;
+ HDF5DataLayer<TypeParam> layer(param);
+
+ // Test that the layer setup got the correct parameters.
+ layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
+ EXPECT_EQ(this->blob_top_data_->num(), batchsize);
+ EXPECT_EQ(this->blob_top_data_->channels(), num_cols);
+ EXPECT_EQ(this->blob_top_data_->height(), 1);
+ EXPECT_EQ(this->blob_top_data_->width(), 1);
+
+ EXPECT_EQ(this->blob_top_label_->num(), batchsize);
+ EXPECT_EQ(this->blob_top_label_->channels(), 1);
+ EXPECT_EQ(this->blob_top_label_->height(), 1);
+ EXPECT_EQ(this->blob_top_label_->width(), 1);
+
+ // Go through the data 100 times.
+ for (int iter = 0; iter < 100; ++iter) {
+ layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_);
+
+ // On even iterations, we're reading the first half of the data.
+ // On odd iterations, we're reading the second half of the data.
+ int label_offset = (iter % 2 == 0) ? 0 : batchsize;
+ int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols;
+
+ for (int i = 0; i < batchsize; ++i) {
+ EXPECT_EQ(
+ label_offset + i,
+ this->blob_top_label_->cpu_data()[i]);
+ }
+ for (int i = 0; i < batchsize; ++i) {
+ for (int j = 0; j < num_cols; ++j) {
+ EXPECT_EQ(
+ data_offset + i * num_cols + j,
+ this->blob_top_data_->cpu_data()[i * num_cols + j])
+ << "debug: i " << i << " j " << j;
+ }
+ }
+ }
+
+ // Exact same test in GPU mode.
+ Caffe::set_mode(Caffe::GPU);
+ // Go through the data 100 times.
+ for (int iter = 0; iter < 100; ++iter) {
+ layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_);
+
+ // On even iterations, we're reading the first half of the data.
+ // On odd iterations, we're reading the second half of the data.
+ int label_offset = (iter % 2 == 0) ? 0 : batchsize;
+ int data_offset = (iter % 2 == 0) ? 0 : batchsize * num_cols;
+
+ for (int i = 0; i < batchsize; ++i) {
+ EXPECT_EQ(
+ label_offset + i,
+ this->blob_top_label_->cpu_data()[i]);
+ }
+ for (int i = 0; i < batchsize; ++i) {
+ for (int j = 0; j < num_cols; ++j) {
+ EXPECT_EQ(
+ data_offset + i * num_cols + j,
+ this->blob_top_data_->cpu_data()[i * num_cols + j])
+ << "debug: i " << i << " j " << j;
+ }
+ }
+ }
+}
+
+} // namespace caffe
return true;
}
+template <>
+void load_2d_dataset<float>(hid_t file_id, const char* dataset_name_,
+ boost::scoped_ptr<float>* array, hsize_t* dims) {
+ herr_t status;
+
+ int ndims;
+ status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
+ assert(ndims == 2);
+
+ H5T_class_t class_;
+ status = H5LTget_dataset_info(
+ file_id, dataset_name_, dims, &class_, NULL);
+ assert(class_ == H5T_NATIVE_FLOAT);
+
+ array->reset(new float[dims[0] * dims[1]]);
+ status = H5LTread_dataset_float(
+ file_id, dataset_name_, array->get());
+}
+
+template <>
+void load_2d_dataset<double>(hid_t file_id, const char* dataset_name_,
+ boost::scoped_ptr<double>* array, hsize_t* dims) {
+ herr_t status;
+
+ int ndims;
+ status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
+ assert(ndims == 2);
+
+ H5T_class_t class_;
+ status = H5LTget_dataset_info(
+ file_id, dataset_name_, dims, &class_, NULL);
+ assert(class_ == H5T_NATIVE_DOUBLE);
+
+ array->reset(new double[dims[0] * dims[1]]);
+ status = H5LTread_dataset_double(
+ file_id, dataset_name_, array->get());
+}
+
} // namespace caffe