From 8867a54599f1d35b36bb29ee6fea94d6f635d675 Mon Sep 17 00:00:00 2001 From: Sergio Date: Sun, 12 Oct 2014 23:16:59 -0700 Subject: [PATCH] Added CPUTimer Make timing more precise using double and microseconds --- include/caffe/util/benchmark.hpp | 20 ++++++++++---- src/caffe/layers/data_layer.cpp | 18 ++++++------- src/caffe/layers/image_data_layer.cpp | 18 ++++++------- src/caffe/layers/window_data_layer.cpp | 18 ++++++------- src/caffe/util/benchmark.cpp | 49 +++++++++++++++++++++++++++++++++- tools/caffe.cpp | 39 +++++++++++++++------------ tools/compute_image_mean.cpp | 2 ++ 7 files changed, 114 insertions(+), 50 deletions(-) diff --git a/include/caffe/util/benchmark.hpp b/include/caffe/util/benchmark.hpp index 6c01ac4..d635827 100644 --- a/include/caffe/util/benchmark.hpp +++ b/include/caffe/util/benchmark.hpp @@ -11,11 +11,11 @@ class Timer { public: Timer(); virtual ~Timer(); - void Start(); - void Stop(); - float MilliSeconds(); - float MicroSeconds(); - float Seconds(); + virtual void Start(); + virtual void Stop(); + virtual float MilliSeconds(); + virtual float MicroSeconds(); + virtual float Seconds(); inline bool initted() { return initted_; } inline bool running() { return running_; } @@ -37,6 +37,16 @@ class Timer { float elapsed_microseconds_; }; +class CPUTimer : public Timer { + public: + explicit CPUTimer(); + virtual ~CPUTimer() {} + virtual void Start(); + virtual void Stop(); + virtual float MilliSeconds(); + virtual float MicroSeconds(); +}; + } // namespace caffe #endif // CAFFE_UTIL_BENCHMARK_H_ diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index 05f2e29..5d6f058 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -82,11 +82,11 @@ void DataLayer::DataLayerSetUp(const vector*>& bottom, // This function is used to create a thread that prefetches the data. template void DataLayer::InternalThreadEntry() { - Timer batch_timer; + CPUTimer batch_timer; batch_timer.Start(); - float read_time = 0; - float trans_time = 0; - Timer timer; + double read_time = 0; + double trans_time = 0; + CPUTimer timer; CHECK(this->prefetch_data_.count()); CHECK(this->transformed_data_.count()); Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); @@ -106,7 +106,7 @@ void DataLayer::InternalThreadEntry() { if (datum.encoded()) { cv_img = DecodeDatumToCVMat(datum); } - read_time += timer.MilliSeconds(); + read_time += timer.MicroSeconds(); timer.Start(); // Apply data transformations (mirror, scale, crop...) @@ -120,7 +120,7 @@ void DataLayer::InternalThreadEntry() { if (this->output_labels_) { top_label[item_id] = datum.label(); } - trans_time += timer.MilliSeconds(); + trans_time += timer.MicroSeconds(); // go to the next iter ++iter_; if (iter_ == dataset_->end()) { @@ -128,9 +128,9 @@ void DataLayer::InternalThreadEntry() { } } batch_timer.Stop(); - DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms."; - DLOG(INFO) << "Read time: " << read_time << "ms."; - DLOG(INFO) << "Transform time: " << trans_time << "ms."; + DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; + DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; + DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(DataLayer); diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp index e0778e5..50997a2 100644 --- a/src/caffe/layers/image_data_layer.cpp +++ b/src/caffe/layers/image_data_layer.cpp @@ -93,11 +93,11 @@ void ImageDataLayer::ShuffleImages() { // This function is used to create a thread that prefetches the data. template void ImageDataLayer::InternalThreadEntry() { - Timer batch_timer; + CPUTimer batch_timer; batch_timer.Start(); - float read_time = 0; - float trans_time = 0; - Timer timer; + double read_time = 0; + double trans_time = 0; + CPUTimer timer; CHECK(this->prefetch_data_.count()); CHECK(this->transformed_data_.count()); Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); @@ -120,13 +120,13 @@ void ImageDataLayer::InternalThreadEntry() { if (!cv_img.data) { continue; } - read_time += timer.MilliSeconds(); + read_time += timer.MicroSeconds(); timer.Start(); // Apply transformations (mirror, crop...) to the image int offset = this->prefetch_data_.offset(item_id); this->transformed_data_.set_cpu_data(top_data + offset); this->data_transformer_.Transform(cv_img, &(this->transformed_data_)); - trans_time += timer.MilliSeconds(); + trans_time += timer.MicroSeconds(); top_label[item_id] = lines_[lines_id_].second; // go to the next iter @@ -141,9 +141,9 @@ void ImageDataLayer::InternalThreadEntry() { } } batch_timer.Stop(); - DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms."; - DLOG(INFO) << "Read time: " << read_time << "ms."; - DLOG(INFO) << "Transform time: " << trans_time << "ms."; + DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; + DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; + DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(ImageDataLayer); diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 14e8caa..6287b38 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -224,11 +224,11 @@ template void WindowDataLayer::InternalThreadEntry() { // At each iteration, sample N windows where N*p are foreground (object) // windows and N*(1-p) are background (non-object) windows - Timer batch_timer; + CPUTimer batch_timer; batch_timer.Start(); - float read_time = 0; - float trans_time = 0; - Timer timer; + double read_time = 0; + double trans_time = 0; + CPUTimer timer; Dtype* top_data = this->prefetch_data_.mutable_cpu_data(); Dtype* top_label = this->prefetch_label_.mutable_cpu_data(); const Dtype scale = this->layer_param_.window_data_param().scale(); @@ -289,7 +289,7 @@ void WindowDataLayer::InternalThreadEntry() { return; } } - read_time += timer.MilliSeconds(); + read_time += timer.MicroSeconds(); timer.Start(); const int channels = cv_img.channels(); @@ -416,7 +416,7 @@ void WindowDataLayer::InternalThreadEntry() { } } } - trans_time += timer.MilliSeconds(); + trans_time += timer.MicroSeconds(); // get window label top_label[item_id] = window[WindowDataLayer::LABEL]; @@ -457,9 +457,9 @@ void WindowDataLayer::InternalThreadEntry() { } } batch_timer.Stop(); - DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << "ms."; - DLOG(INFO) << "Read time: " << read_time << "ms."; - DLOG(INFO) << "Transform time: " << trans_time << "ms."; + DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; + DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; + DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(WindowDataLayer); diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp index 2edc9a5..1d269c3 100644 --- a/src/caffe/util/benchmark.cpp +++ b/src/caffe/util/benchmark.cpp @@ -69,7 +69,7 @@ float Timer::MicroSeconds() { CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_, stop_gpu_)); // Cuda only measure milliseconds - elapsed_microseconds_ = elapsed_milliseconds_ / 1000; + elapsed_microseconds_ = elapsed_milliseconds_ * 1000; #else NO_GPU; #endif @@ -118,4 +118,51 @@ void Timer::Init() { } } +CPUTimer::CPUTimer() { + this->initted_ = true; + this->running_ = false; + this->has_run_at_least_once_ = false; +} + +void CPUTimer::Start() { + if (!running()) { + this->start_cpu_ = boost::posix_time::microsec_clock::local_time(); + this->running_ = true; + this->has_run_at_least_once_ = true; + } +} + +void CPUTimer::Stop() { + if (running()) { + this->stop_cpu_ = boost::posix_time::microsec_clock::local_time(); + this->running_ = false; + } +} + +float CPUTimer::MilliSeconds() { + if (!has_run_at_least_once()) { + LOG(WARNING) << "Timer has never been run before reading time."; + return 0; + } + if (running()) { + Stop(); + } + this->elapsed_milliseconds_ = (this->stop_cpu_ - + this->start_cpu_).total_milliseconds(); + return this->elapsed_milliseconds_; +} + +float CPUTimer::MicroSeconds() { + if (!has_run_at_least_once()) { + LOG(WARNING) << "Timer has never been run before reading time."; + return 0; + } + if (running()) { + Stop(); + } + this->elapsed_microseconds_ = (this->stop_cpu_ - + this->start_cpu_).total_microseconds(); + return this->elapsed_microseconds_; +} + } // namespace caffe diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 1c842a0..9f9d975 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -231,10 +231,10 @@ int time() { Timer forward_timer; Timer backward_timer; Timer timer; - std::vector forward_time_per_layer(layers.size(), 0.0); - std::vector backward_time_per_layer(layers.size(), 0.0); - float forward_time = 0.0; - float backward_time = 0.0; + std::vector forward_time_per_layer(layers.size(), 0.0); + std::vector backward_time_per_layer(layers.size(), 0.0); + double forward_time = 0.0; + double backward_time = 0.0; for (int j = 0; j < FLAGS_iterations; ++j) { Timer iter_timer; iter_timer.Start(); @@ -245,33 +245,38 @@ int time() { // so that we will notice Reshape performance bugs. layers[i]->Reshape(bottom_vecs[i], top_vecs[i]); layers[i]->Forward(bottom_vecs[i], top_vecs[i]); - forward_time_per_layer[i] += timer.MilliSeconds(); + forward_time_per_layer[i] += timer.MicroSeconds(); } - forward_time += forward_timer.MilliSeconds(); + forward_time += forward_timer.MicroSeconds(); backward_timer.Start(); for (int i = layers.size() - 1; i >= 0; --i) { timer.Start(); layers[i]->Backward(top_vecs[i], bottom_need_backward[i], bottom_vecs[i]); - backward_time_per_layer[i] += timer.MilliSeconds(); + backward_time_per_layer[i] += timer.MicroSeconds(); } - backward_time += backward_timer.MilliSeconds(); + backward_time += backward_timer.MicroSeconds(); LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: " - << iter_timer.MilliSeconds() << " milliseconds."; + << iter_timer.MilliSeconds() << " ms."; } + LOG(INFO) << "Average time per layer: "; for (int i = 0; i < layers.size(); ++i) { const caffe::string& layername = layers[i]->layer_param().name(); LOG(INFO) << std::setfill(' ') << std::setw(10) << layername << - "\tforward: " << forward_time_per_layer[i] << " milliseconds."; + "\tforward: " << forward_time_per_layer[i] / 1000 / + FLAGS_iterations << " ms."; LOG(INFO) << std::setfill(' ') << std::setw(10) << layername << - "\tbackward: " << backward_time_per_layer[i] << " milliseconds."; + "\tbackward: " << backward_time_per_layer[i] / 1000 / + FLAGS_iterations << " ms."; } - LOG(INFO) << "Forward pass: " << forward_time << - " milliseconds."; - LOG(INFO) << "Backward pass: " << backward_time << - " milliseconds."; - LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << - " milliseconds."; + total_timer.Stop(); + LOG(INFO) << "Average Forward pass: " << forward_time / 1000 / + FLAGS_iterations << " ms."; + LOG(INFO) << "Average Backward pass: " << backward_time / 1000 / + FLAGS_iterations << " ms."; + LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() / + FLAGS_iterations << " ms."; + LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms."; LOG(INFO) << "*** Benchmark ends ***"; return 0; } diff --git a/tools/compute_image_mean.cpp b/tools/compute_image_mean.cpp index 4c2c658..358f57e 100644 --- a/tools/compute_image_mean.cpp +++ b/tools/compute_image_mean.cpp @@ -4,6 +4,8 @@ #include #include +#include +#include #include "caffe/dataset_factory.hpp" #include "caffe/proto/caffe.pb.h" -- 2.7.4