#include "caffe/blob.hpp"
#include "caffe/common.hpp"
+#include "caffe/data_transformer.hpp"
#include "caffe/filler.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
// TODO: DataLayer, ImageDataLayer, and WindowDataLayer all have the
// same basic structure and a lot of duplicated code.
-
template <typename Dtype>
class DataLayer : public Layer<Dtype>, public InternalThread {
public:
explicit DataLayer(const LayerParameter& param)
- : Layer<Dtype>(param) {}
+ : Layer<Dtype>(param),
+ data_transformer_(param.data_param().transform_param()) {}
virtual ~DataLayer();
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
virtual void CreatePrefetchThread();
virtual void JoinPrefetchThread();
- virtual unsigned int PrefetchRand();
// The thread's function
virtual void InternalThreadEntry();
- shared_ptr<Caffe::RNG> prefetch_rng_;
+ DataTransformer<Dtype> data_transformer_;
// LEVELDB
shared_ptr<leveldb::DB> db_;
--- /dev/null
+#ifndef CAFFE_DATA_TRANSFORMER_HPP
+#define CAFFE_DATA_TRANSFORMER_HPP
+
+#include "caffe/common.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+/**
+ * @brief Applies common transformations to the input data, such as
+ * scaling, mirroring, substracting the image mean...
+ */
+template <typename Dtype>
+class DataTransformer {
+ public:
+ explicit DataTransformer(const TransformationParameter& param)
+ : param_(param) {
+ phase_ = Caffe::phase();
+ }
+ virtual ~DataTransformer() {}
+
+ void InitRand();
+
+ /**
+ * @brief Applies the transformation defined in the data layer's
+ * transform_param block to the data.
+ *
+ * @param batch_item_id
+ * Datum position within the batch. This is used to compute the
+ * writing position in the top blob's data
+ * @param datum
+ * Datum containing the data to be transformed.
+ * @param mean
+ * @param top_data
+ * This is meant to be the top blob's data. The transformed data will be
+ * written at the appropriate place within the blob's data.
+ */
+ void Transform(const int batch_item_id, const Datum& datum,
+ const Dtype* mean, Dtype* transformed_data);
+
+ protected:
+ virtual unsigned int Rand();
+
+ // Tranformation parameters
+ TransformationParameter param_;
+
+
+ shared_ptr<Caffe::RNG> rng_;
+ Caffe::Phase phase_;
+};
+
+} // namespace caffe
+
+#endif // CAFFE_DATA_TRANSFORMER_HPP_
+
--- /dev/null
+#include <string>
+
+#include "caffe/data_transformer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/util/rng.hpp"
+
+namespace caffe {
+
+template<typename Dtype>
+void DataTransformer<Dtype>::Transform(const int batch_item_id,
+ const Datum& datum,
+ const Dtype* mean,
+ Dtype* transformed_data) {
+
+ const string& data = datum.data();
+ const int channels = datum.channels();
+ const int height = datum.height();
+ const int width = datum.width();
+ const int size = datum.channels() * datum.height() * datum.width();
+
+ const int crop_size = param_.crop_size();
+ const bool mirror = param_.mirror();
+ const Dtype scale = param_.scale();
+
+
+
+ if (mirror && crop_size == 0) {
+ LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
+ << "set at the same time.";
+ }
+
+ if (crop_size) {
+ CHECK(data.size()) << "Image cropping only support uint8 data";
+ int h_off, w_off;
+ // We only do random crop when we do training.
+ if (phase_ == Caffe::TRAIN) {
+ h_off = Rand() % (height - crop_size);
+ w_off = Rand() % (width - crop_size);
+ } else {
+ h_off = (height - crop_size) / 2;
+ w_off = (width - crop_size) / 2;
+ }
+ if (mirror && Rand() % 2) {
+ // Copy mirrored version
+ for (int c = 0; c < channels; ++c) {
+ for (int h = 0; h < crop_size; ++h) {
+ for (int w = 0; w < crop_size; ++w) {
+ int data_index = (c * height + h + h_off) * width + w + w_off;
+ int top_index = ((batch_item_id * channels + c) * crop_size + h)
+ * crop_size + (crop_size - 1 - w);
+ Dtype datum_element =
+ static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
+ transformed_data[top_index] =
+ (datum_element - mean[data_index]) * scale;
+ }
+ }
+ }
+ } else {
+ // Normal copy
+ for (int c = 0; c < channels; ++c) {
+ for (int h = 0; h < crop_size; ++h) {
+ for (int w = 0; w < crop_size; ++w) {
+ int top_index = ((batch_item_id * channels + c) * crop_size + h)
+ * crop_size + w;
+ int data_index = (c * height + h + h_off) * width + w + w_off;
+ Dtype datum_element =
+ static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
+ transformed_data[top_index] =
+ (datum_element - mean[data_index]) * scale;
+ }
+ }
+ }
+ }
+ } else {
+ // we will prefer to use data() first, and then try float_data()
+ if (data.size()) {
+ for (int j = 0; j < size; ++j) {
+ Dtype datum_element =
+ static_cast<Dtype>(static_cast<uint8_t>(data[j]));
+ transformed_data[j + batch_item_id * size] =
+ (datum_element - mean[j]) * scale;
+ }
+ } else {
+ for (int j = 0; j < size; ++j) {
+ transformed_data[j + batch_item_id * size] =
+ (datum.float_data(j) - mean[j]) * scale;
+ }
+ }
+ }
+}
+
+template <typename Dtype>
+void DataTransformer<Dtype>::InitRand() {
+ const bool needs_rand = (phase_ == Caffe::TRAIN) &&
+ (param_.mirror() || param_.crop_size());
+ if (needs_rand) {
+ const unsigned int rng_seed = caffe_rng_rand();
+ rng_.reset(new Caffe::RNG(rng_seed));
+ } else {
+ rng_.reset();
+ }
+}
+
+template <typename Dtype>
+unsigned int DataTransformer<Dtype>::Rand() {
+ CHECK(rng_);
+ caffe::rng_t* rng =
+ static_cast<caffe::rng_t*>(rng_->generator());
+ return (*rng)();
+}
+
+INSTANTIATE_CLASS(DataTransformer);
+
+} // namespace caffe
if (output_labels_) {
top_label = prefetch_label_.mutable_cpu_data();
}
- const Dtype scale = this->layer_param_.data_param().scale();
const int batch_size = this->layer_param_.data_param().batch_size();
- const int crop_size = this->layer_param_.data_param().crop_size();
- const bool mirror = this->layer_param_.data_param().mirror();
- if (mirror && crop_size == 0) {
- LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
- << "set at the same time.";
- }
- // datum scales
- const int channels = datum_channels_;
- const int height = datum_height_;
- const int width = datum_width_;
- const int size = datum_size_;
const Dtype* mean = data_mean_.cpu_data();
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
LOG(FATAL) << "Unknown database backend";
}
- const string& data = datum.data();
- if (crop_size) {
- CHECK(data.size()) << "Image cropping only support uint8 data";
- int h_off, w_off;
- // We only do random crop when we do training.
- if (phase_ == Caffe::TRAIN) {
- h_off = PrefetchRand() % (height - crop_size);
- w_off = PrefetchRand() % (width - crop_size);
- } else {
- h_off = (height - crop_size) / 2;
- w_off = (width - crop_size) / 2;
- }
- if (mirror && PrefetchRand() % 2) {
- // Copy mirrored version
- for (int c = 0; c < channels; ++c) {
- for (int h = 0; h < crop_size; ++h) {
- for (int w = 0; w < crop_size; ++w) {
- int top_index = ((item_id * channels + c) * crop_size + h)
- * crop_size + (crop_size - 1 - w);
- int data_index = (c * height + h + h_off) * width + w + w_off;
- Dtype datum_element =
- static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
- top_data[top_index] = (datum_element - mean[data_index]) * scale;
- }
- }
- }
- } else {
- // Normal copy
- for (int c = 0; c < channels; ++c) {
- for (int h = 0; h < crop_size; ++h) {
- for (int w = 0; w < crop_size; ++w) {
- int top_index = ((item_id * channels + c) * crop_size + h)
- * crop_size + w;
- int data_index = (c * height + h + h_off) * width + w + w_off;
- Dtype datum_element =
- static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
- top_data[top_index] = (datum_element - mean[data_index]) * scale;
- }
- }
- }
- }
- } else {
- // we will prefer to use data() first, and then try float_data()
- if (data.size()) {
- for (int j = 0; j < size; ++j) {
- Dtype datum_element =
- static_cast<Dtype>(static_cast<uint8_t>(data[j]));
- top_data[item_id * size + j] = (datum_element - mean[j]) * scale;
- }
- } else {
- for (int j = 0; j < size; ++j) {
- top_data[item_id * size + j] =
- (datum.float_data(j) - mean[j]) * scale;
- }
- }
- }
+ // Apply data transformations (mirror, scale, crop...)
+ data_transformer_.Transform(item_id, datum, mean, top_data);
if (output_labels_) {
top_label[item_id] = datum.label();
}
+
// go to the next iter
switch (this->layer_param_.data_param().backend()) {
case DataParameter_DB_LEVELDB:
}
// image
- int crop_size = this->layer_param_.data_param().crop_size();
+ int crop_size = this->layer_param_.data_param().transform_param().crop_size();
if (crop_size > 0) {
(*top)[0]->Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), crop_size, crop_size);
CHECK_GT(datum_height_, crop_size);
CHECK_GT(datum_width_, crop_size);
// check if we want to have mean
- if (this->layer_param_.data_param().has_mean_file()) {
- const string& mean_file = this->layer_param_.data_param().mean_file();
+ if (this->layer_param_.data_param().transform_param().has_mean_file()) {
+ const string& mean_file =
+ this->layer_param_.data_param().transform_param().mean_file();
LOG(INFO) << "Loading mean file from" << mean_file;
BlobProto blob_proto;
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
template <typename Dtype>
void DataLayer<Dtype>::CreatePrefetchThread() {
phase_ = Caffe::phase();
- const bool prefetch_needs_rand = (phase_ == Caffe::TRAIN) &&
- (this->layer_param_.data_param().mirror() ||
- this->layer_param_.data_param().crop_size());
- if (prefetch_needs_rand) {
- const unsigned int prefetch_rng_seed = caffe_rng_rand();
- prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
- } else {
- prefetch_rng_.reset();
- }
+
+ data_transformer_.InitRand();
+
CHECK(!StartInternalThread()) << "Pthread execution failed";
}
}
template <typename Dtype>
-unsigned int DataLayer<Dtype>::PrefetchRand() {
- CHECK(prefetch_rng_);
- caffe::rng_t* prefetch_rng =
- static_cast<caffe::rng_t*>(prefetch_rng_->generator());
- return (*prefetch_rng)();
-}
-
-template <typename Dtype>
void DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
// First, join the thread
optional FillerParameter bias_filler = 8; // The filler for the bias
}
+// Message that stores parameters used to apply transformation
+// to the data layer's data
+message TransformationParameter {
+ // For data pre-processing, we can do simple scaling and subtracting the
+ // data mean, if provided. Note that the mean subtraction is always carried
+ // out before scaling.
+ optional float scale = 1 [default = 1];
+ // Specify if we want to randomly mirror data.
+ optional bool mirror = 2 [default = false];
+ // Specify if we would like to randomly crop an image.
+ optional uint32 crop_size = 3 [default = 0];
+ optional string mean_file = 4;
+}
+
// Message that stores parameters used by DataLayer
message DataParameter {
enum DB {
}
// Specify the data source.
optional string source = 1;
- // For data pre-processing, we can do simple scaling and subtracting the
- // data mean, if provided. Note that the mean subtraction is always carried
- // out before scaling.
- optional float scale = 2 [default = 1];
- optional string mean_file = 3;
// Specify the batch size.
- optional uint32 batch_size = 4;
- // Specify if we would like to randomly crop an image.
- optional uint32 crop_size = 5 [default = 0];
- // Specify if we want to randomly mirror data.
- optional bool mirror = 6 [default = false];
+ optional uint32 batch_size = 3;
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the leveldb.
- optional uint32 rand_skip = 7 [default = 0];
- optional DB backend = 8 [default = LEVELDB];
+ optional uint32 rand_skip = 4 [default = 0];
+
+ // Parameters for data pre-processing.
+ optional TransformationParameter transform_param = 5;
+
+ optional DB backend = 6 [default = LEVELDB];
}
// Message that stores parameters used by DropoutLayer
LayerParameter param;
DataParameter* data_param = param.mutable_data_param();
data_param->set_batch_size(5);
- data_param->set_scale(scale);
data_param->set_source(filename_->c_str());
data_param->set_backend(backend_);
+
+ TransformationParameter* transform_param =
+ data_param->mutable_transform_param();
+ transform_param->set_scale(scale);
+
DataLayer<Dtype> layer(param);
layer.SetUp(blob_bottom_vec_, &blob_top_vec_);
EXPECT_EQ(blob_top_data_->num(), 5);
const Dtype scale = 3;
LayerParameter param;
Caffe::set_random_seed(1701);
+
DataParameter* data_param = param.mutable_data_param();
data_param->set_batch_size(5);
- data_param->set_scale(scale);
- data_param->set_crop_size(1);
data_param->set_source(filename_->c_str());
data_param->set_backend(backend_);
+
+ TransformationParameter* transform_param =
+ data_param->mutable_transform_param();
+ transform_param->set_scale(scale);
+ transform_param->set_crop_size(1);
+
DataLayer<Dtype> layer(param);
layer.SetUp(blob_bottom_vec_, &blob_top_vec_);
EXPECT_EQ(blob_top_data_->num(), 5);
LayerParameter param;
DataParameter* data_param = param.mutable_data_param();
data_param->set_batch_size(5);
- data_param->set_crop_size(1);
- data_param->set_mirror(true);
data_param->set_source(filename_->c_str());
data_param->set_backend(backend_);
+ TransformationParameter* transform_param =
+ data_param->mutable_transform_param();
+ transform_param->set_crop_size(1);
+ transform_param->set_mirror(true);
+
// Get crop sequence with Caffe seed 1701.
Caffe::set_random_seed(seed_);
vector<vector<Dtype> > crop_sequence;
LayerParameter param;
DataParameter* data_param = param.mutable_data_param();
data_param->set_batch_size(5);
- data_param->set_crop_size(1);
- data_param->set_mirror(true);
data_param->set_source(filename_->c_str());
data_param->set_backend(backend_);
+ TransformationParameter* transform_param =
+ data_param->mutable_transform_param();
+ transform_param->set_crop_size(1);
+ transform_param->set_mirror(true);
+
// Get crop sequence with Caffe seed 1701, srand seed 1701.
Caffe::set_random_seed(seed_);
srand(seed_);
" top: 'label' "
" data_param { "
" source: 'mnist-train-leveldb' "
- " scale: 0.00390625 "
+ " transform_param { "
+ " scale: 0.00390625 "
+ " } "
" batch_size: 64 "
" } "
" include: { phase: TRAIN } "
" top: 'label' "
" data_param { "
" source: 'mnist-test-leveldb' "
- " scale: 0.00390625 "
+ " transform_param { "
+ " scale: 0.00390625 "
+ " } "
" batch_size: 100 "
" } "
" include: { phase: TEST } "
" top: 'label' "
" data_param { "
" source: 'mnist-train-leveldb' "
- " scale: 0.00390625 "
+ " transform_param { "
+ " scale: 0.00390625 "
+ " } "
" batch_size: 64 "
" } "
" include: { phase: TRAIN } "
" top: 'label' "
" data_param { "
" source: 'mnist-test-leveldb' "
- " scale: 0.00390625 "
+ " transform_param { "
+ " scale: 0.00390625 "
+ " } "
" batch_size: 100 "
" } "
" include: { phase: TEST } "
" type: DATA "
" data_param { "
" source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
- " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
" batch_size: 256 "
- " crop_size: 227 "
- " mirror: true "
+ " transform_param { "
+ " crop_size: 227 "
+ " mirror: true "
+ " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+ " } "
" } "
" top: 'data' "
" top: 'label' "
" type: DATA "
" data_param { "
" source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
- " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
" batch_size: 256 "
- " crop_size: 227 "
- " mirror: true "
+ " transform_param { "
+ " crop_size: 227 "
+ " mirror: true "
+ " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+ " } "
" } "
" top: 'data' "
" top: 'label' "
" type: DATA "
" data_param { "
" source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
- " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
" batch_size: 256 "
- " crop_size: 227 "
- " mirror: true "
- " scale: 0.25 "
+ " transform_param { "
+ " crop_size: 227 "
+ " mirror: true "
+ " scale: 0.25 "
+ " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+ " } "
" rand_skip: 73 "
" } "
" top: 'data' "
" type: DATA "
" data_param { "
" source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
- " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
" batch_size: 256 "
- " crop_size: 227 "
- " mirror: true "
+ " transform_param { "
+ " crop_size: 227 "
+ " mirror: true "
+ " mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+ " } "
" } "
" top: 'data' "
" top: 'label' "
}
if (v0_layer_param.has_scale()) {
if (type == "data") {
- layer_param->mutable_data_param()->set_scale(v0_layer_param.scale());
+ layer_param->mutable_data_param()->mutable_transform_param()->
+ set_scale(v0_layer_param.scale());
} else if (type == "images") {
layer_param->mutable_image_data_param()->set_scale(
v0_layer_param.scale());
}
if (v0_layer_param.has_meanfile()) {
if (type == "data") {
- layer_param->mutable_data_param()->set_mean_file(
- v0_layer_param.meanfile());
+ layer_param->mutable_data_param()->mutable_transform_param()->
+ set_mean_file(v0_layer_param.meanfile());
} else if (type == "images") {
layer_param->mutable_image_data_param()->set_mean_file(
v0_layer_param.meanfile());
}
if (v0_layer_param.has_cropsize()) {
if (type == "data") {
- layer_param->mutable_data_param()->set_crop_size(
- v0_layer_param.cropsize());
+ layer_param->mutable_data_param()->mutable_transform_param()->
+ set_crop_size(v0_layer_param.cropsize());
} else if (type == "images") {
layer_param->mutable_image_data_param()->set_crop_size(
v0_layer_param.cropsize());
}
if (v0_layer_param.has_mirror()) {
if (type == "data") {
- layer_param->mutable_data_param()->set_mirror(v0_layer_param.mirror());
+ layer_param->mutable_data_param()->mutable_transform_param()->
+ set_mirror(v0_layer_param.mirror());
} else if (type == "images") {
layer_param->mutable_image_data_param()->set_mirror(
v0_layer_param.mirror());