Change caffe time to do forward/backward and accumulate time per layer
authorSergio <sguada@gmail.com>
Wed, 15 Oct 2014 22:36:16 +0000 (15:36 -0700)
committerSergio <sguada@gmail.com>
Thu, 16 Oct 2014 00:03:08 +0000 (17:03 -0700)
Makefile
src/caffe/layers/data_layer.cpp
src/caffe/layers/image_data_layer.cpp
src/caffe/layers/window_data_layer.cpp
src/caffe/net.cpp
tools/caffe.cpp

index 393fe18..26d5964 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -281,12 +281,6 @@ ifeq ($(USE_CUDNN), 1)
        COMMON_FLAGS += -DUSE_CUDNN
 endif
 
-TIMING ?= 0
-# Timing Flag
-ifneq ($(TIMING), 0)
-       COMMON_FLAGS += -DTIMING
-endif
-
 # CPU-only configuration
 ifeq ($(CPU_ONLY), 1)
        OBJS := $(PROTO_OBJS) $(CXX_OBJS)
index 95604e5..7ddd466 100644 (file)
@@ -8,9 +8,7 @@
 #include "caffe/dataset_factory.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
-#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -84,13 +82,11 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void DataLayer<Dtype>::InternalThreadEntry() {
-  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
   float read_time = 0;
   float trans_time = 0;
   Timer timer;
-  #endif
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -105,14 +101,13 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     // get a blob
     CHECK(iter_ != dataset_->end());
     const Datum& datum = iter_->value;
+
     cv::Mat cv_img;
     if (datum.encoded()) {
        cv_img = DecodeDatumToCVMat(datum);
     }
-    #ifdef TIMING
     read_time += timer.MilliSeconds();
     timer.Start();
-    #endif
 
     // Apply data transformations (mirror, scale, crop...)
     int offset = this->prefetch_data_.offset(item_id);
@@ -125,20 +120,17 @@ void DataLayer<Dtype>::InternalThreadEntry() {
     if (this->output_labels_) {
       top_label[item_id] = datum.label();
     }
-    #ifdef TIMING
     trans_time += timer.MilliSeconds();
-    #endif
     // go to the next iter
     ++iter_;
     if (iter_ == dataset_->end()) {
       iter_ = dataset_->begin();
     }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  LOG(INFO) << "Read time: " << read_time << "ms.";
-  LOG(INFO) << "Transform time: " << trans_time << "ms.";
-  #endif
+  batch_timer.Stop();
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(DataLayer);
index 0abcd88..e0778e5 100644 (file)
@@ -6,9 +6,7 @@
 
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
-#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -95,13 +93,11 @@ void ImageDataLayer<Dtype>::ShuffleImages() {
 // This function is used to create a thread that prefetches the data.
 template <typename Dtype>
 void ImageDataLayer<Dtype>::InternalThreadEntry() {
-  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
   float read_time = 0;
   float trans_time = 0;
   Timer timer;
-  #endif
   CHECK(this->prefetch_data_.count());
   CHECK(this->transformed_data_.count());
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
@@ -117,26 +113,20 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
   const int lines_size = lines_.size();
   for (int item_id = 0; item_id < batch_size; ++item_id) {
     // get a blob
-    #ifdef TIMING
     timer.Start();
-    #endif
     CHECK_GT(lines_size, lines_id_);
     cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
                                     new_height, new_width, is_color);
     if (!cv_img.data) {
       continue;
     }
-    #ifdef TIMING
     read_time += timer.MilliSeconds();
     timer.Start();
-    #endif
     // Apply transformations (mirror, crop...) to the image
     int offset = this->prefetch_data_.offset(item_id);
     this->transformed_data_.set_cpu_data(top_data + offset);
     this->data_transformer_.Transform(cv_img, &(this->transformed_data_));
-    #ifdef TIMING
     trans_time += timer.MilliSeconds();
-    #endif
 
     top_label[item_id] = lines_[lines_id_].second;
     // go to the next iter
@@ -150,11 +140,10 @@ void ImageDataLayer<Dtype>::InternalThreadEntry() {
       }
     }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  LOG(INFO) << "Read time: " << read_time << "ms.";
-  LOG(INFO) << "Transform time: " << trans_time << "ms.";
-  #endif
+  batch_timer.Stop();
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(ImageDataLayer);
index 8f75557..14e8caa 100644 (file)
@@ -13,9 +13,7 @@
 #include "caffe/common.hpp"
 #include "caffe/data_layers.hpp"
 #include "caffe/layer.hpp"
-#ifdef TIMING
 #include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/rng.hpp"
@@ -226,13 +224,11 @@ template <typename Dtype>
 void WindowDataLayer<Dtype>::InternalThreadEntry() {
   // At each iteration, sample N windows where N*p are foreground (object)
   // windows and N*(1-p) are background (non-object) windows
-  #ifdef TIMING
   Timer batch_timer;
   batch_timer.Start();
   float read_time = 0;
   float trans_time = 0;
   Timer timer;
-  #endif
   Dtype* top_data = this->prefetch_data_.mutable_cpu_data();
   Dtype* top_label = this->prefetch_label_.mutable_cpu_data();
   const Dtype scale = this->layer_param_.window_data_param().scale();
@@ -269,9 +265,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
   for (int is_fg = 0; is_fg < 2; ++is_fg) {
     for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {
       // sample a window
-      #ifdef TIMING
       timer.Start();
-      #endif
       const unsigned int rand_index = PrefetchRand();
       vector<float> window = (is_fg) ?
           fg_windows_[rand_index % fg_windows_.size()] :
@@ -295,10 +289,8 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           return;
         }
       }
-      #ifdef TIMING
       read_time += timer.MilliSeconds();
       timer.Start();
-      #endif
       const int channels = cv_img.channels();
 
       // crop window out of image and warp it
@@ -424,9 +416,7 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
           }
         }
       }
-      #ifdef TIMING
       trans_time += timer.MilliSeconds();
-      #endif
       // get window label
       top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];
 
@@ -466,12 +456,10 @@ void WindowDataLayer<Dtype>::InternalThreadEntry() {
       item_id++;
     }
   }
-  #ifdef TIMING
   batch_timer.Stop();
-  LOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
-  LOG(INFO) << "Read time: " << read_time << "ms.";
-  LOG(INFO) << "Transform time: " << trans_time << "ms.";
-  #endif
+  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms.";
+  DLOG(INFO) << "Read time: " << read_time << "ms.";
+  DLOG(INFO) << "Transform time: " << trans_time << "ms.";
 }
 
 INSTANTIATE_CLASS(WindowDataLayer);
index c0e5692..21ab15f 100644 (file)
@@ -9,9 +9,6 @@
 #include "caffe/layer.hpp"
 #include "caffe/net.hpp"
 #include "caffe/proto/caffe.pb.h"
-#ifdef TIMING
-#include "caffe/util/benchmark.hpp"
-#endif
 #include "caffe/util/insert_splits.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -502,10 +499,6 @@ void Net<Dtype>::GetLearningRateAndWeightDecay() {
 
 template <typename Dtype>
 Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
-  #ifdef TIMING
-  Timer timer;
-  timer.Start();
-  #endif
   CHECK_GE(start, 0);
   CHECK_LT(end, layers_.size());
   Dtype loss = 0;
@@ -516,9 +509,6 @@ Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
     loss += layer_loss;
     if (debug_info_) { ForwardDebugInfo(i); }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Forward time: " << timer.MilliSeconds() << "ms.";
-  #endif
   return loss;
 }
 
@@ -575,10 +565,6 @@ string Net<Dtype>::Forward(const string& input_blob_protos, Dtype* loss) {
 
 template <typename Dtype>
 void Net<Dtype>::BackwardFromTo(int start, int end) {
-  #ifdef TIMING
-  Timer timer;
-  timer.Start();
-  #endif
   CHECK_GE(end, 0);
   CHECK_LT(start, layers_.size());
   for (int i = start; i >= end; --i) {
@@ -588,9 +574,6 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
       if (debug_info_) { BackwardDebugInfo(i); }
     }
   }
-  #ifdef TIMING
-  LOG(INFO) << "Backward time: " << timer.MilliSeconds() << "ms.";
-  #endif
 }
 
 template <typename Dtype>
index bfcd9f1..1c842a0 100644 (file)
@@ -229,35 +229,46 @@ int time() {
   Timer total_timer;
   total_timer.Start();
   Timer forward_timer;
-  forward_timer.Start();
+  Timer backward_timer;
   Timer timer;
-  for (int i = 0; i < layers.size(); ++i) {
-    const caffe::string& layername = layers[i]->layer_param().name();
-    timer.Start();
-    for (int j = 0; j < FLAGS_iterations; ++j) {
+  std::vector<float> forward_time_per_layer(layers.size(), 0.0);
+  std::vector<float> backward_time_per_layer(layers.size(), 0.0);
+  float forward_time = 0.0;
+  float backward_time = 0.0;
+  for (int j = 0; j < FLAGS_iterations; ++j) {
+    Timer iter_timer;
+    iter_timer.Start();
+    forward_timer.Start();
+    for (int i = 0; i < layers.size(); ++i) {
+      timer.Start();
       // Although Reshape should be essentially free, we include it here
       // so that we will notice Reshape performance bugs.
       layers[i]->Reshape(bottom_vecs[i], top_vecs[i]);
       layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
+      forward_time_per_layer[i] += timer.MilliSeconds();
     }
-    LOG(INFO) << layername << "\tforward: " << timer.MilliSeconds() <<
-        " milliseconds.";
-  }
-  LOG(INFO) << "Forward pass: " << forward_timer.MilliSeconds() <<
-      " milliseconds.";
-  Timer backward_timer;
-  backward_timer.Start();
-  for (int i = layers.size() - 1; i >= 0; --i) {
-    const caffe::string& layername = layers[i]->layer_param().name();
-    timer.Start();
-    for (int j = 0; j < FLAGS_iterations; ++j) {
+    forward_time += forward_timer.MilliSeconds();
+    backward_timer.Start();
+    for (int i = layers.size() - 1; i >= 0; --i) {
+      timer.Start();
       layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
                           bottom_vecs[i]);
+      backward_time_per_layer[i] += timer.MilliSeconds();
     }
-    LOG(INFO) << layername << "\tbackward: "
-        << timer.MilliSeconds() << " milliseconds.";
+    backward_time += backward_timer.MilliSeconds();
+    LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
+      << iter_timer.MilliSeconds() << " milliseconds.";
   }
-  LOG(INFO) << "Backward pass: " << backward_timer.MilliSeconds() <<
+  for (int i = 0; i < layers.size(); ++i) {
+    const caffe::string& layername = layers[i]->layer_param().name();
+    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
+      "\tforward: " << forward_time_per_layer[i] << " milliseconds.";
+    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
+      "\tbackward: " << backward_time_per_layer[i] << " milliseconds.";
+  }
+  LOG(INFO) << "Forward pass: " << forward_time <<
+      " milliseconds.";
+  LOG(INFO) << "Backward pass: " << backward_time <<
       " milliseconds.";
   LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() <<
       " milliseconds.";