2 #include <opencv2/core/core.hpp>
8 #include "caffe/data_transformer.hpp"
9 #include "caffe/util/io.hpp"
10 #include "caffe/util/math_functions.hpp"
11 #include "caffe/util/rng.hpp"
15 template<typename Dtype>
16 DataTransformer<Dtype>::DataTransformer(const TransformationParameter& param,
18 : param_(param), phase_(phase) {
19 // check if we want to use mean_file
20 if (param_.has_mean_file()) {
21 CHECK_EQ(param_.mean_value_size(), 0) <<
22 "Cannot specify mean_file and mean_value at the same time";
23 const string& mean_file = param.mean_file();
24 if (Caffe::root_solver()) {
25 LOG(INFO) << "Loading mean file from: " << mean_file;
28 ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
29 data_mean_.FromProto(blob_proto);
31 // check if we want to use mean_value
32 if (param_.mean_value_size() > 0) {
33 CHECK(param_.has_mean_file() == false) <<
34 "Cannot specify mean_file and mean_value at the same time";
35 for (int c = 0; c < param_.mean_value_size(); ++c) {
36 mean_values_.push_back(param_.mean_value(c));
41 template<typename Dtype>
42 void DataTransformer<Dtype>::Transform(const Datum& datum,
43 Dtype* transformed_data) {
44 const string& data = datum.data();
45 const int datum_channels = datum.channels();
46 const int datum_height = datum.height();
47 const int datum_width = datum.width();
49 const int crop_size = param_.crop_size();
50 const Dtype scale = param_.scale();
51 const bool do_mirror = param_.mirror() && Rand(2);
52 const bool has_mean_file = param_.has_mean_file();
53 const bool has_uint8 = data.size() > 0;
54 const bool has_mean_values = mean_values_.size() > 0;
56 CHECK_GT(datum_channels, 0);
57 CHECK_GE(datum_height, crop_size);
58 CHECK_GE(datum_width, crop_size);
62 CHECK_EQ(datum_channels, data_mean_.channels());
63 CHECK_EQ(datum_height, data_mean_.height());
64 CHECK_EQ(datum_width, data_mean_.width());
65 mean = data_mean_.mutable_cpu_data();
67 if (has_mean_values) {
68 CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
69 "Specify either 1 mean_value or as many as channels: " << datum_channels;
70 if (datum_channels > 1 && mean_values_.size() == 1) {
71 // Replicate the mean_value for simplicity
72 for (int c = 1; c < datum_channels; ++c) {
73 mean_values_.push_back(mean_values_[0]);
78 int height = datum_height;
79 int width = datum_width;
86 // We only do random crop when we do training.
87 if (phase_ == TRAIN) {
88 h_off = Rand(datum_height - crop_size + 1);
89 w_off = Rand(datum_width - crop_size + 1);
91 h_off = (datum_height - crop_size) / 2;
92 w_off = (datum_width - crop_size) / 2;
97 int top_index, data_index;
98 for (int c = 0; c < datum_channels; ++c) {
99 for (int h = 0; h < height; ++h) {
100 for (int w = 0; w < width; ++w) {
101 data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;
103 top_index = (c * height + h) * width + (width - 1 - w);
105 top_index = (c * height + h) * width + w;
109 static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
111 datum_element = datum.float_data(data_index);
114 transformed_data[top_index] =
115 (datum_element - mean[data_index]) * scale;
117 if (has_mean_values) {
118 transformed_data[top_index] =
119 (datum_element - mean_values_[c]) * scale;
121 transformed_data[top_index] = datum_element * scale;
130 template<typename Dtype>
131 void DataTransformer<Dtype>::Transform(const Datum& datum,
132 Blob<Dtype>* transformed_blob) {
133 // If datum is encoded, decode and transform the cv::image.
134 if (datum.encoded()) {
136 CHECK(!(param_.force_color() && param_.force_gray()))
137 << "cannot set both force_color and force_gray";
139 if (param_.force_color() || param_.force_gray()) {
140 // If force_color then decode in color otherwise decode in gray.
141 cv_img = DecodeDatumToCVMat(datum, param_.force_color());
143 cv_img = DecodeDatumToCVMatNative(datum);
145 // Transform the cv::image into blob.
146 return Transform(cv_img, transformed_blob);
148 LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
151 if (param_.force_color() || param_.force_gray()) {
152 LOG(ERROR) << "force_color and force_gray only for encoded datum";
156 const int crop_size = param_.crop_size();
157 const int datum_channels = datum.channels();
158 const int datum_height = datum.height();
159 const int datum_width = datum.width();
162 const int channels = transformed_blob->channels();
163 const int height = transformed_blob->height();
164 const int width = transformed_blob->width();
165 const int num = transformed_blob->num();
167 CHECK_EQ(channels, datum_channels);
168 CHECK_LE(height, datum_height);
169 CHECK_LE(width, datum_width);
173 CHECK_EQ(crop_size, height);
174 CHECK_EQ(crop_size, width);
176 CHECK_EQ(datum_height, height);
177 CHECK_EQ(datum_width, width);
180 Dtype* transformed_data = transformed_blob->mutable_cpu_data();
181 Transform(datum, transformed_data);
184 template<typename Dtype>
185 void DataTransformer<Dtype>::Transform(const vector<Datum> & datum_vector,
186 Blob<Dtype>* transformed_blob) {
187 const int datum_num = datum_vector.size();
188 const int num = transformed_blob->num();
189 const int channels = transformed_blob->channels();
190 const int height = transformed_blob->height();
191 const int width = transformed_blob->width();
193 CHECK_GT(datum_num, 0) << "There is no datum to add";
194 CHECK_LE(datum_num, num) <<
195 "The size of datum_vector must be no greater than transformed_blob->num()";
196 Blob<Dtype> uni_blob(1, channels, height, width);
197 for (int item_id = 0; item_id < datum_num; ++item_id) {
198 int offset = transformed_blob->offset(item_id);
199 uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
200 Transform(datum_vector[item_id], &uni_blob);
205 template<typename Dtype>
206 void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
207 Blob<Dtype>* transformed_blob) {
208 const int mat_num = mat_vector.size();
209 const int num = transformed_blob->num();
210 const int channels = transformed_blob->channels();
211 const int height = transformed_blob->height();
212 const int width = transformed_blob->width();
214 CHECK_GT(mat_num, 0) << "There is no MAT to add";
215 CHECK_EQ(mat_num, num) <<
216 "The size of mat_vector must be equals to transformed_blob->num()";
217 Blob<Dtype> uni_blob(1, channels, height, width);
218 for (int item_id = 0; item_id < mat_num; ++item_id) {
219 int offset = transformed_blob->offset(item_id);
220 uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
221 Transform(mat_vector[item_id], &uni_blob);
225 template<typename Dtype>
226 void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
227 Blob<Dtype>* transformed_blob) {
228 const int crop_size = param_.crop_size();
229 const int img_channels = cv_img.channels();
230 const int img_height = cv_img.rows;
231 const int img_width = cv_img.cols;
234 const int channels = transformed_blob->channels();
235 const int height = transformed_blob->height();
236 const int width = transformed_blob->width();
237 const int num = transformed_blob->num();
239 CHECK_EQ(channels, img_channels);
240 CHECK_LE(height, img_height);
241 CHECK_LE(width, img_width);
244 CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
246 const Dtype scale = param_.scale();
247 const bool do_mirror = param_.mirror() && Rand(2);
248 const bool has_mean_file = param_.has_mean_file();
249 const bool has_mean_values = mean_values_.size() > 0;
251 CHECK_GT(img_channels, 0);
252 CHECK_GE(img_height, crop_size);
253 CHECK_GE(img_width, crop_size);
257 CHECK_EQ(img_channels, data_mean_.channels());
258 CHECK_EQ(img_height, data_mean_.height());
259 CHECK_EQ(img_width, data_mean_.width());
260 mean = data_mean_.mutable_cpu_data();
262 if (has_mean_values) {
263 CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
264 "Specify either 1 mean_value or as many as channels: " << img_channels;
265 if (img_channels > 1 && mean_values_.size() == 1) {
266 // Replicate the mean_value for simplicity
267 for (int c = 1; c < img_channels; ++c) {
268 mean_values_.push_back(mean_values_[0]);
275 cv::Mat cv_cropped_img = cv_img;
277 CHECK_EQ(crop_size, height);
278 CHECK_EQ(crop_size, width);
279 // We only do random crop when we do training.
280 if (phase_ == TRAIN) {
281 h_off = Rand(img_height - crop_size + 1);
282 w_off = Rand(img_width - crop_size + 1);
284 h_off = (img_height - crop_size) / 2;
285 w_off = (img_width - crop_size) / 2;
287 cv::Rect roi(w_off, h_off, crop_size, crop_size);
288 cv_cropped_img = cv_img(roi);
290 CHECK_EQ(img_height, height);
291 CHECK_EQ(img_width, width);
294 CHECK(cv_cropped_img.data);
296 Dtype* transformed_data = transformed_blob->mutable_cpu_data();
298 for (int h = 0; h < height; ++h) {
299 const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
301 for (int w = 0; w < width; ++w) {
302 for (int c = 0; c < img_channels; ++c) {
304 top_index = (c * height + h) * width + (width - 1 - w);
306 top_index = (c * height + h) * width + w;
308 // int top_index = (c * height + h) * width + w;
309 Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
311 int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
312 transformed_data[top_index] =
313 (pixel - mean[mean_index]) * scale;
315 if (has_mean_values) {
316 transformed_data[top_index] =
317 (pixel - mean_values_[c]) * scale;
319 transformed_data[top_index] = pixel * scale;
328 template<typename Dtype>
329 void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
330 Blob<Dtype>* transformed_blob) {
331 const int crop_size = param_.crop_size();
332 const int input_num = input_blob->num();
333 const int input_channels = input_blob->channels();
334 const int input_height = input_blob->height();
335 const int input_width = input_blob->width();
337 if (transformed_blob->count() == 0) {
338 // Initialize transformed_blob with the right shape.
340 transformed_blob->Reshape(input_num, input_channels,
341 crop_size, crop_size);
343 transformed_blob->Reshape(input_num, input_channels,
344 input_height, input_width);
348 const int num = transformed_blob->num();
349 const int channels = transformed_blob->channels();
350 const int height = transformed_blob->height();
351 const int width = transformed_blob->width();
352 const int size = transformed_blob->count();
354 CHECK_LE(input_num, num);
355 CHECK_EQ(input_channels, channels);
356 CHECK_GE(input_height, height);
357 CHECK_GE(input_width, width);
360 const Dtype scale = param_.scale();
361 const bool do_mirror = param_.mirror() && Rand(2);
362 const bool has_mean_file = param_.has_mean_file();
363 const bool has_mean_values = mean_values_.size() > 0;
368 CHECK_EQ(crop_size, height);
369 CHECK_EQ(crop_size, width);
370 // We only do random crop when we do training.
371 if (phase_ == TRAIN) {
372 h_off = Rand(input_height - crop_size + 1);
373 w_off = Rand(input_width - crop_size + 1);
375 h_off = (input_height - crop_size) / 2;
376 w_off = (input_width - crop_size) / 2;
379 CHECK_EQ(input_height, height);
380 CHECK_EQ(input_width, width);
383 Dtype* input_data = input_blob->mutable_cpu_data();
385 CHECK_EQ(input_channels, data_mean_.channels());
386 CHECK_EQ(input_height, data_mean_.height());
387 CHECK_EQ(input_width, data_mean_.width());
388 for (int n = 0; n < input_num; ++n) {
389 int offset = input_blob->offset(n);
390 caffe_sub(data_mean_.count(), input_data + offset,
391 data_mean_.cpu_data(), input_data + offset);
395 if (has_mean_values) {
396 CHECK(mean_values_.size() == 1 || mean_values_.size() == input_channels) <<
397 "Specify either 1 mean_value or as many as channels: " << input_channels;
398 if (mean_values_.size() == 1) {
399 caffe_add_scalar(input_blob->count(), -(mean_values_[0]), input_data);
401 for (int n = 0; n < input_num; ++n) {
402 for (int c = 0; c < input_channels; ++c) {
403 int offset = input_blob->offset(n, c);
404 caffe_add_scalar(input_height * input_width, -(mean_values_[c]),
405 input_data + offset);
411 Dtype* transformed_data = transformed_blob->mutable_cpu_data();
413 for (int n = 0; n < input_num; ++n) {
414 int top_index_n = n * channels;
415 int data_index_n = n * channels;
416 for (int c = 0; c < channels; ++c) {
417 int top_index_c = (top_index_n + c) * height;
418 int data_index_c = (data_index_n + c) * input_height + h_off;
419 for (int h = 0; h < height; ++h) {
420 int top_index_h = (top_index_c + h) * width;
421 int data_index_h = (data_index_c + h) * input_width + w_off;
423 int top_index_w = top_index_h + width - 1;
424 for (int w = 0; w < width; ++w) {
425 transformed_data[top_index_w-w] = input_data[data_index_h + w];
428 for (int w = 0; w < width; ++w) {
429 transformed_data[top_index_h + w] = input_data[data_index_h + w];
435 if (scale != Dtype(1)) {
436 DLOG(INFO) << "Scale: " << scale;
437 caffe_scal(size, scale, transformed_data);
441 template<typename Dtype>
442 vector<int> DataTransformer<Dtype>::InferBlobShape(const Datum& datum) {
443 if (datum.encoded()) {
445 CHECK(!(param_.force_color() && param_.force_gray()))
446 << "cannot set both force_color and force_gray";
448 if (param_.force_color() || param_.force_gray()) {
449 // If force_color then decode in color otherwise decode in gray.
450 cv_img = DecodeDatumToCVMat(datum, param_.force_color());
452 cv_img = DecodeDatumToCVMatNative(datum);
454 // InferBlobShape using the cv::image.
455 return InferBlobShape(cv_img);
457 LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
460 const int crop_size = param_.crop_size();
461 const int datum_channels = datum.channels();
462 const int datum_height = datum.height();
463 const int datum_width = datum.width();
465 CHECK_GT(datum_channels, 0);
466 CHECK_GE(datum_height, crop_size);
467 CHECK_GE(datum_width, crop_size);
469 vector<int> shape(4);
471 shape[1] = datum_channels;
472 shape[2] = (crop_size)? crop_size: datum_height;
473 shape[3] = (crop_size)? crop_size: datum_width;
477 template<typename Dtype>
478 vector<int> DataTransformer<Dtype>::InferBlobShape(
479 const vector<Datum> & datum_vector) {
480 const int num = datum_vector.size();
481 CHECK_GT(num, 0) << "There is no datum to in the vector";
482 // Use first datum in the vector to InferBlobShape.
483 vector<int> shape = InferBlobShape(datum_vector[0]);
484 // Adjust num to the size of the vector.
490 template<typename Dtype>
491 vector<int> DataTransformer<Dtype>::InferBlobShape(const cv::Mat& cv_img) {
492 const int crop_size = param_.crop_size();
493 const int img_channels = cv_img.channels();
494 const int img_height = cv_img.rows;
495 const int img_width = cv_img.cols;
497 CHECK_GT(img_channels, 0);
498 CHECK_GE(img_height, crop_size);
499 CHECK_GE(img_width, crop_size);
501 vector<int> shape(4);
503 shape[1] = img_channels;
504 shape[2] = (crop_size)? crop_size: img_height;
505 shape[3] = (crop_size)? crop_size: img_width;
509 template<typename Dtype>
510 vector<int> DataTransformer<Dtype>::InferBlobShape(
511 const vector<cv::Mat> & mat_vector) {
512 const int num = mat_vector.size();
513 CHECK_GT(num, 0) << "There is no cv_img to in the vector";
514 // Use first cv_img in the vector to InferBlobShape.
515 vector<int> shape = InferBlobShape(mat_vector[0]);
516 // Adjust num to the size of the vector.
522 template <typename Dtype>
523 void DataTransformer<Dtype>::InitRand() {
524 const bool needs_rand = param_.mirror() ||
525 (phase_ == TRAIN && param_.crop_size());
527 const unsigned int rng_seed = caffe_rng_rand();
528 rng_.reset(new Caffe::RNG(rng_seed));
534 template <typename Dtype>
535 int DataTransformer<Dtype>::Rand(int n) {
539 static_cast<caffe::rng_t*>(rng_->generator());
540 return ((*rng)() % n);
543 INSTANTIATE_CLASS(DataTransformer);