src/caffe/data_transformer.cpp

   1 #ifdef USE_OPENCV
   2 #include <opencv2/core/core.hpp>
   3 #endif  // USE_OPENCV
   4
   5 #include <string>
   6 #include <vector>
   7
   8 #include "caffe/data_transformer.hpp"
   9 #include "caffe/util/io.hpp"
  10 #include "caffe/util/math_functions.hpp"
  11 #include "caffe/util/rng.hpp"
  12
  13 namespace caffe {
  14
  15 template<typename Dtype>
  16 DataTransformer<Dtype>::DataTransformer(const TransformationParameter& param,
  17     Phase phase)
  18     : param_(param), phase_(phase) {
  19   // check if we want to use mean_file
  20   if (param_.has_mean_file()) {
  21     CHECK_EQ(param_.mean_value_size(), 0) <<
  22       "Cannot specify mean_file and mean_value at the same time";
  23     const string& mean_file = param.mean_file();
  24     if (Caffe::root_solver()) {
  25       LOG(INFO) << "Loading mean file from: " << mean_file;
  26     }
  27     BlobProto blob_proto;
  28     ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
  29     data_mean_.FromProto(blob_proto);
  30   }
  31   // check if we want to use mean_value
  32   if (param_.mean_value_size() > 0) {
  33     CHECK(param_.has_mean_file() == false) <<
  34       "Cannot specify mean_file and mean_value at the same time";
  35     for (int c = 0; c < param_.mean_value_size(); ++c) {
  36       mean_values_.push_back(param_.mean_value(c));
  37     }
  38   }
  39 }
  40
  41 template<typename Dtype>
  42 void DataTransformer<Dtype>::Transform(const Datum& datum,
  43                                        Dtype* transformed_data) {
  44   const string& data = datum.data();
  45   const int datum_channels = datum.channels();
  46   const int datum_height = datum.height();
  47   const int datum_width = datum.width();
  48
  49   const int crop_size = param_.crop_size();
  50   const Dtype scale = param_.scale();
  51   const bool do_mirror = param_.mirror() && Rand(2);
  52   const bool has_mean_file = param_.has_mean_file();
  53   const bool has_uint8 = data.size() > 0;
  54   const bool has_mean_values = mean_values_.size() > 0;
  55
  56   CHECK_GT(datum_channels, 0);
  57   CHECK_GE(datum_height, crop_size);
  58   CHECK_GE(datum_width, crop_size);
  59
  60   Dtype* mean = NULL;
  61   if (has_mean_file) {
  62     CHECK_EQ(datum_channels, data_mean_.channels());
  63     CHECK_EQ(datum_height, data_mean_.height());
  64     CHECK_EQ(datum_width, data_mean_.width());
  65     mean = data_mean_.mutable_cpu_data();
  66   }
  67   if (has_mean_values) {
  68     CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
  69      "Specify either 1 mean_value or as many as channels: " << datum_channels;
  70     if (datum_channels > 1 && mean_values_.size() == 1) {
  71       // Replicate the mean_value for simplicity
  72       for (int c = 1; c < datum_channels; ++c) {
  73         mean_values_.push_back(mean_values_[0]);
  74       }
  75     }
  76   }
  77
  78   int height = datum_height;
  79   int width = datum_width;
  80
  81   int h_off = 0;
  82   int w_off = 0;
  83   if (crop_size) {
  84     height = crop_size;
  85     width = crop_size;
  86     // We only do random crop when we do training.
  87     if (phase_ == TRAIN) {
  88       h_off = Rand(datum_height - crop_size + 1);
  89       w_off = Rand(datum_width - crop_size + 1);
  90     } else {
  91       h_off = (datum_height - crop_size) / 2;
  92       w_off = (datum_width - crop_size) / 2;
  93     }
  94   }
  95
  96   Dtype datum_element;
  97   int top_index, data_index;
  98   for (int c = 0; c < datum_channels; ++c) {
  99     for (int h = 0; h < height; ++h) {
 100       for (int w = 0; w < width; ++w) {
 101         data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;
 102         if (do_mirror) {
 103           top_index = (c * height + h) * width + (width - 1 - w);
 104         } else {
 105           top_index = (c * height + h) * width + w;
 106         }
 107         if (has_uint8) {
 108           datum_element =
 109             static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
 110         } else {
 111           datum_element = datum.float_data(data_index);
 112         }
 113         if (has_mean_file) {
 114           transformed_data[top_index] =
 115             (datum_element - mean[data_index]) * scale;
 116         } else {
 117           if (has_mean_values) {
 118             transformed_data[top_index] =
 119               (datum_element - mean_values_[c]) * scale;
 120           } else {
 121             transformed_data[top_index] = datum_element * scale;
 122           }
 123         }
 124       }
 125     }
 126   }
 127 }
 128
 129
 130 template<typename Dtype>
 131 void DataTransformer<Dtype>::Transform(const Datum& datum,
 132                                        Blob<Dtype>* transformed_blob) {
 133   // If datum is encoded, decode and transform the cv::image.
 134   if (datum.encoded()) {
 135 #ifdef USE_OPENCV
 136     CHECK(!(param_.force_color() && param_.force_gray()))
 137         << "cannot set both force_color and force_gray";
 138     cv::Mat cv_img;
 139     if (param_.force_color() || param_.force_gray()) {
 140     // If force_color then decode in color otherwise decode in gray.
 141       cv_img = DecodeDatumToCVMat(datum, param_.force_color());
 142     } else {
 143       cv_img = DecodeDatumToCVMatNative(datum);
 144     }
 145     // Transform the cv::image into blob.
 146     return Transform(cv_img, transformed_blob);
 147 #else
 148     LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
 149 #endif  // USE_OPENCV
 150   } else {
 151     if (param_.force_color() || param_.force_gray()) {
 152       LOG(ERROR) << "force_color and force_gray only for encoded datum";
 153     }
 154   }
 155
 156   const int crop_size = param_.crop_size();
 157   const int datum_channels = datum.channels();
 158   const int datum_height = datum.height();
 159   const int datum_width = datum.width();
 160
 161   // Check dimensions.
 162   const int channels = transformed_blob->channels();
 163   const int height = transformed_blob->height();
 164   const int width = transformed_blob->width();
 165   const int num = transformed_blob->num();
 166
 167   CHECK_EQ(channels, datum_channels);
 168   CHECK_LE(height, datum_height);
 169   CHECK_LE(width, datum_width);
 170   CHECK_GE(num, 1);
 171
 172   if (crop_size) {
 173     CHECK_EQ(crop_size, height);
 174     CHECK_EQ(crop_size, width);
 175   } else {
 176     CHECK_EQ(datum_height, height);
 177     CHECK_EQ(datum_width, width);
 178   }
 179
 180   Dtype* transformed_data = transformed_blob->mutable_cpu_data();
 181   Transform(datum, transformed_data);
 182 }
 183
 184 template<typename Dtype>
 185 void DataTransformer<Dtype>::Transform(const vector<Datum> & datum_vector,
 186                                        Blob<Dtype>* transformed_blob) {
 187   const int datum_num = datum_vector.size();
 188   const int num = transformed_blob->num();
 189   const int channels = transformed_blob->channels();
 190   const int height = transformed_blob->height();
 191   const int width = transformed_blob->width();
 192
 193   CHECK_GT(datum_num, 0) << "There is no datum to add";
 194   CHECK_LE(datum_num, num) <<
 195     "The size of datum_vector must be no greater than transformed_blob->num()";
 196   Blob<Dtype> uni_blob(1, channels, height, width);
 197   for (int item_id = 0; item_id < datum_num; ++item_id) {
 198     int offset = transformed_blob->offset(item_id);
 199     uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
 200     Transform(datum_vector[item_id], &uni_blob);
 201   }
 202 }
 203
 204 #ifdef USE_OPENCV
 205 template<typename Dtype>
 206 void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
 207                                        Blob<Dtype>* transformed_blob) {
 208   const int mat_num = mat_vector.size();
 209   const int num = transformed_blob->num();
 210   const int channels = transformed_blob->channels();
 211   const int height = transformed_blob->height();
 212   const int width = transformed_blob->width();
 213
 214   CHECK_GT(mat_num, 0) << "There is no MAT to add";
 215   CHECK_EQ(mat_num, num) <<
 216     "The size of mat_vector must be equals to transformed_blob->num()";
 217   Blob<Dtype> uni_blob(1, channels, height, width);
 218   for (int item_id = 0; item_id < mat_num; ++item_id) {
 219     int offset = transformed_blob->offset(item_id);
 220     uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
 221     Transform(mat_vector[item_id], &uni_blob);
 222   }
 223 }
 224
 225 template<typename Dtype>
 226 void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
 227                                        Blob<Dtype>* transformed_blob) {
 228   const int crop_size = param_.crop_size();
 229   const int img_channels = cv_img.channels();
 230   const int img_height = cv_img.rows;
 231   const int img_width = cv_img.cols;
 232
 233   // Check dimensions.
 234   const int channels = transformed_blob->channels();
 235   const int height = transformed_blob->height();
 236   const int width = transformed_blob->width();
 237   const int num = transformed_blob->num();
 238
 239   CHECK_EQ(channels, img_channels);
 240   CHECK_LE(height, img_height);
 241   CHECK_LE(width, img_width);
 242   CHECK_GE(num, 1);
 243
 244   CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
 245
 246   const Dtype scale = param_.scale();
 247   const bool do_mirror = param_.mirror() && Rand(2);
 248   const bool has_mean_file = param_.has_mean_file();
 249   const bool has_mean_values = mean_values_.size() > 0;
 250
 251   CHECK_GT(img_channels, 0);
 252   CHECK_GE(img_height, crop_size);
 253   CHECK_GE(img_width, crop_size);
 254
 255   Dtype* mean = NULL;
 256   if (has_mean_file) {
 257     CHECK_EQ(img_channels, data_mean_.channels());
 258     CHECK_EQ(img_height, data_mean_.height());
 259     CHECK_EQ(img_width, data_mean_.width());
 260     mean = data_mean_.mutable_cpu_data();
 261   }
 262   if (has_mean_values) {
 263     CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
 264      "Specify either 1 mean_value or as many as channels: " << img_channels;
 265     if (img_channels > 1 && mean_values_.size() == 1) {
 266       // Replicate the mean_value for simplicity
 267       for (int c = 1; c < img_channels; ++c) {
 268         mean_values_.push_back(mean_values_[0]);
 269       }
 270     }
 271   }
 272
 273   int h_off = 0;
 274   int w_off = 0;
 275   cv::Mat cv_cropped_img = cv_img;
 276   if (crop_size) {
 277     CHECK_EQ(crop_size, height);
 278     CHECK_EQ(crop_size, width);
 279     // We only do random crop when we do training.
 280     if (phase_ == TRAIN) {
 281       h_off = Rand(img_height - crop_size + 1);
 282       w_off = Rand(img_width - crop_size + 1);
 283     } else {
 284       h_off = (img_height - crop_size) / 2;
 285       w_off = (img_width - crop_size) / 2;
 286     }
 287     cv::Rect roi(w_off, h_off, crop_size, crop_size);
 288     cv_cropped_img = cv_img(roi);
 289   } else {
 290     CHECK_EQ(img_height, height);
 291     CHECK_EQ(img_width, width);
 292   }
 293
 294   CHECK(cv_cropped_img.data);
 295
 296   Dtype* transformed_data = transformed_blob->mutable_cpu_data();
 297   int top_index;
 298   for (int h = 0; h < height; ++h) {
 299     const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
 300     int img_index = 0;
 301     for (int w = 0; w < width; ++w) {
 302       for (int c = 0; c < img_channels; ++c) {
 303         if (do_mirror) {
 304           top_index = (c * height + h) * width + (width - 1 - w);
 305         } else {
 306           top_index = (c * height + h) * width + w;
 307         }
 308         // int top_index = (c * height + h) * width + w;
 309         Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
 310         if (has_mean_file) {
 311           int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
 312           transformed_data[top_index] =
 313             (pixel - mean[mean_index]) * scale;
 314         } else {
 315           if (has_mean_values) {
 316             transformed_data[top_index] =
 317               (pixel - mean_values_[c]) * scale;
 318           } else {
 319             transformed_data[top_index] = pixel * scale;
 320           }
 321         }
 322       }
 323     }
 324   }
 325 }
 326 #endif  // USE_OPENCV
 327
 328 template<typename Dtype>
 329 void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
 330                                        Blob<Dtype>* transformed_blob) {
 331   const int crop_size = param_.crop_size();
 332   const int input_num = input_blob->num();
 333   const int input_channels = input_blob->channels();
 334   const int input_height = input_blob->height();
 335   const int input_width = input_blob->width();
 336
 337   if (transformed_blob->count() == 0) {
 338     // Initialize transformed_blob with the right shape.
 339     if (crop_size) {
 340       transformed_blob->Reshape(input_num, input_channels,
 341                                 crop_size, crop_size);
 342     } else {
 343       transformed_blob->Reshape(input_num, input_channels,
 344                                 input_height, input_width);
 345     }
 346   }
 347
 348   const int num = transformed_blob->num();
 349   const int channels = transformed_blob->channels();
 350   const int height = transformed_blob->height();
 351   const int width = transformed_blob->width();
 352   const int size = transformed_blob->count();
 353
 354   CHECK_LE(input_num, num);
 355   CHECK_EQ(input_channels, channels);
 356   CHECK_GE(input_height, height);
 357   CHECK_GE(input_width, width);
 358
 359
 360   const Dtype scale = param_.scale();
 361   const bool do_mirror = param_.mirror() && Rand(2);
 362   const bool has_mean_file = param_.has_mean_file();
 363   const bool has_mean_values = mean_values_.size() > 0;
 364
 365   int h_off = 0;
 366   int w_off = 0;
 367   if (crop_size) {
 368     CHECK_EQ(crop_size, height);
 369     CHECK_EQ(crop_size, width);
 370     // We only do random crop when we do training.
 371     if (phase_ == TRAIN) {
 372       h_off = Rand(input_height - crop_size + 1);
 373       w_off = Rand(input_width - crop_size + 1);
 374     } else {
 375       h_off = (input_height - crop_size) / 2;
 376       w_off = (input_width - crop_size) / 2;
 377     }
 378   } else {
 379     CHECK_EQ(input_height, height);
 380     CHECK_EQ(input_width, width);
 381   }
 382
 383   Dtype* input_data = input_blob->mutable_cpu_data();
 384   if (has_mean_file) {
 385     CHECK_EQ(input_channels, data_mean_.channels());
 386     CHECK_EQ(input_height, data_mean_.height());
 387     CHECK_EQ(input_width, data_mean_.width());
 388     for (int n = 0; n < input_num; ++n) {
 389       int offset = input_blob->offset(n);
 390       caffe_sub(data_mean_.count(), input_data + offset,
 391             data_mean_.cpu_data(), input_data + offset);
 392     }
 393   }
 394
 395   if (has_mean_values) {
 396     CHECK(mean_values_.size() == 1 || mean_values_.size() == input_channels) <<
 397      "Specify either 1 mean_value or as many as channels: " << input_channels;
 398     if (mean_values_.size() == 1) {
 399       caffe_add_scalar(input_blob->count(), -(mean_values_[0]), input_data);
 400     } else {
 401       for (int n = 0; n < input_num; ++n) {
 402         for (int c = 0; c < input_channels; ++c) {
 403           int offset = input_blob->offset(n, c);
 404           caffe_add_scalar(input_height * input_width, -(mean_values_[c]),
 405             input_data + offset);
 406         }
 407       }
 408     }
 409   }
 410
 411   Dtype* transformed_data = transformed_blob->mutable_cpu_data();
 412
 413   for (int n = 0; n < input_num; ++n) {
 414     int top_index_n = n * channels;
 415     int data_index_n = n * channels;
 416     for (int c = 0; c < channels; ++c) {
 417       int top_index_c = (top_index_n + c) * height;
 418       int data_index_c = (data_index_n + c) * input_height + h_off;
 419       for (int h = 0; h < height; ++h) {
 420         int top_index_h = (top_index_c + h) * width;
 421         int data_index_h = (data_index_c + h) * input_width + w_off;
 422         if (do_mirror) {
 423           int top_index_w = top_index_h + width - 1;
 424           for (int w = 0; w < width; ++w) {
 425             transformed_data[top_index_w-w] = input_data[data_index_h + w];
 426           }
 427         } else {
 428           for (int w = 0; w < width; ++w) {
 429             transformed_data[top_index_h + w] = input_data[data_index_h + w];
 430           }
 431         }
 432       }
 433     }
 434   }
 435   if (scale != Dtype(1)) {
 436     DLOG(INFO) << "Scale: " << scale;
 437     caffe_scal(size, scale, transformed_data);
 438   }
 439 }
 440
 441 template<typename Dtype>
 442 vector<int> DataTransformer<Dtype>::InferBlobShape(const Datum& datum) {
 443   if (datum.encoded()) {
 444 #ifdef USE_OPENCV
 445     CHECK(!(param_.force_color() && param_.force_gray()))
 446         << "cannot set both force_color and force_gray";
 447     cv::Mat cv_img;
 448     if (param_.force_color() || param_.force_gray()) {
 449     // If force_color then decode in color otherwise decode in gray.
 450       cv_img = DecodeDatumToCVMat(datum, param_.force_color());
 451     } else {
 452       cv_img = DecodeDatumToCVMatNative(datum);
 453     }
 454     // InferBlobShape using the cv::image.
 455     return InferBlobShape(cv_img);
 456 #else
 457     LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
 458 #endif  // USE_OPENCV
 459   }
 460   const int crop_size = param_.crop_size();
 461   const int datum_channels = datum.channels();
 462   const int datum_height = datum.height();
 463   const int datum_width = datum.width();
 464   // Check dimensions.
 465   CHECK_GT(datum_channels, 0);
 466   CHECK_GE(datum_height, crop_size);
 467   CHECK_GE(datum_width, crop_size);
 468   // Build BlobShape.
 469   vector<int> shape(4);
 470   shape[0] = 1;
 471   shape[1] = datum_channels;
 472   shape[2] = (crop_size)? crop_size: datum_height;
 473   shape[3] = (crop_size)? crop_size: datum_width;
 474   return shape;
 475 }
 476
 477 template<typename Dtype>
 478 vector<int> DataTransformer<Dtype>::InferBlobShape(
 479     const vector<Datum> & datum_vector) {
 480   const int num = datum_vector.size();
 481   CHECK_GT(num, 0) << "There is no datum to in the vector";
 482   // Use first datum in the vector to InferBlobShape.
 483   vector<int> shape = InferBlobShape(datum_vector[0]);
 484   // Adjust num to the size of the vector.
 485   shape[0] = num;
 486   return shape;
 487 }
 488
 489 #ifdef USE_OPENCV
 490 template<typename Dtype>
 491 vector<int> DataTransformer<Dtype>::InferBlobShape(const cv::Mat& cv_img) {
 492   const int crop_size = param_.crop_size();
 493   const int img_channels = cv_img.channels();
 494   const int img_height = cv_img.rows;
 495   const int img_width = cv_img.cols;
 496   // Check dimensions.
 497   CHECK_GT(img_channels, 0);
 498   CHECK_GE(img_height, crop_size);
 499   CHECK_GE(img_width, crop_size);
 500   // Build BlobShape.
 501   vector<int> shape(4);
 502   shape[0] = 1;
 503   shape[1] = img_channels;
 504   shape[2] = (crop_size)? crop_size: img_height;
 505   shape[3] = (crop_size)? crop_size: img_width;
 506   return shape;
 507 }
 508
 509 template<typename Dtype>
 510 vector<int> DataTransformer<Dtype>::InferBlobShape(
 511     const vector<cv::Mat> & mat_vector) {
 512   const int num = mat_vector.size();
 513   CHECK_GT(num, 0) << "There is no cv_img to in the vector";
 514   // Use first cv_img in the vector to InferBlobShape.
 515   vector<int> shape = InferBlobShape(mat_vector[0]);
 516   // Adjust num to the size of the vector.
 517   shape[0] = num;
 518   return shape;
 519 }
 520 #endif  // USE_OPENCV
 521
 522 template <typename Dtype>
 523 void DataTransformer<Dtype>::InitRand() {
 524   const bool needs_rand = param_.mirror() ||
 525       (phase_ == TRAIN && param_.crop_size());
 526   if (needs_rand) {
 527     const unsigned int rng_seed = caffe_rng_rand();
 528     rng_.reset(new Caffe::RNG(rng_seed));
 529   } else {
 530     rng_.reset();
 531   }
 532 }
 533
 534 template <typename Dtype>
 535 int DataTransformer<Dtype>::Rand(int n) {
 536   CHECK(rng_);
 537   CHECK_GT(n, 0);
 538   caffe::rng_t* rng =
 539       static_cast<caffe::rng_t*>(rng_->generator());
 540   return ((*rng)() % n);
 541 }
 542
 543 INSTANTIATE_CLASS(DataTransformer);
 544
 545 }  // namespace caffe