#include "caffe/blob.hpp"
#include "caffe/common.hpp"
+#include "caffe/layer_factory.hpp"
#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/device_alternate.hpp"
+#include "caffe/util/math_functions.hpp"
+#ifdef USE_PROFILING
+#include <sys/time.h>
+#define NANO_SEC_CONV 1000000
+extern unsigned int acl_log_flags;
+#endif //USE_PROFILING
+/**
+ Forward declare boost::thread instead of including boost/thread.hpp
+ to avoid a boost/NVCC issues (#1009, #1010) on OSX.
+ */
+namespace boost { class mutex; }
namespace caffe {
+bool AclEnableSchedule(int enable=1);
+#ifdef USE_PROFILING
+class logtime_util
+{
+ public:
+ logtime_util(int mask_, const char* information_){
+ mask = mask_;
+ if(acl_log_flags & mask){
+ strncpy(information, information_, 255);
+ gettimeofday(&tv[0], NULL);
+ }
+ }
+ ~logtime_util(){
+ if(acl_log_flags & mask){
+ long time[2];
+ gettimeofday(&tv[1], NULL);
+ time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec;
+ time[1] = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec;
+ printf("%s %.6lf\n", information, (((double)time[1] - time[0]) / NANO_SEC_CONV));
+ }
+ }
+ void log_time(bool start)
+ {
+ if(acl_log_flags & mask){
+ if (start){
+ gettimeofday(&tv[0], NULL);
+ }
+ else{
+ long time[2];
+ gettimeofday(&tv[1], NULL);
+ time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec;
+ time[1] = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec;
+ printf("%s %.6lf\n", information, (((double)time[1] - time[0]) / NANO_SEC_CONV));
+ }
+ }
+ }
+private:
+ struct timeval tv[2];
+ int mask;
+ char information[256];
+};
+
+#ifdef LAYER_PERF_STAT
+
+struct perf_stat {
+
+uint64_t total;
+uint32_t start;
+uint32_t end;
+uint32_t used;
+uint32_t count;
+
+perf_stat(): total(0),start(0),end(0),count(0){};
+
+};
+
+
+#endif
+#endif //USE_PROFILING
/**
* @brief An interface for the units of computation which can be composed into a
* Net.
*
- * Layer&s must implement a Forward function, in which they take their input
- * (bottom) Blob&s (if any) and compute their output Blob&s (if any).
+ * Layer%s must implement a Forward function, in which they take their input
+ * (bottom) Blob%s (if any) and compute their output Blob%s (if any).
* They may also implement a Backward function, in which they compute the error
- * gradients with respect to their input Blob&s, given the error gradients with
- * their output Blob&s.
+ * gradients with respect to their input Blob%s, given the error gradients with
+ * their output Blob%s.
*/
template <typename Dtype>
class Layer {
*/
explicit Layer(const LayerParameter& param)
: layer_param_(param) {
- // The only thing we do is to copy blobs if there are any.
+ // Set phase and copy blobs (if there are any).
+ phase_ = param.phase();
if (layer_param_.blobs_size() > 0) {
blobs_.resize(layer_param_.blobs_size());
for (int i = 0; i < layer_param_.blobs_size(); ++i) {
* Sets up the loss weight multiplier blobs for any non-zero loss weights.
* This method may not be overridden.
*/
- void SetUp(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
- CheckBlobCounts(bottom, *top);
+ void SetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ CheckBlobCounts(bottom, top);
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
* adjust the top blob sizes.
*/
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {}
+ const vector<Blob<Dtype>*>& top) {}
/**
- * @brief Adjust the shapes of top blobs and internal buffers to accomodate
+ * @brief Adjust the shapes of top blobs and internal buffers to accommodate
* the shapes of the bottom blobs.
*
* @param bottom the input blobs, with the requested input shapes
* This method should reshape top blobs as needed according to the shapes
* of the bottom (input) blobs, as well as reshaping any internal buffers
* and making any other necessary adjustments so that the layer can
- * accomodate the bottom blobs.
+ * accommodate the bottom blobs.
*/
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) = 0;
+ const vector<Blob<Dtype>*>& top) = 0;
/**
* @brief Given the bottom blobs, compute the top blobs and the loss.
*
* Your layer should implement Forward_cpu and (optionally) Forward_gpu.
*/
+#ifdef USE_PROFILING
+ Dtype Forward(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+#else
inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top);
+ const vector<Blob<Dtype>*>& top);
+#endif //USE_PROFILING
/**
* @brief Given the top blob error gradients, compute the bottom blob error
* (Backward_cpu or Backward_gpu) to compute the bottom blob diffs given the
* top blob diffs.
*
- * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
+ * Your layer should implement Backward_cpu and (optionally) Backward_gpu.
*/
inline void Backward(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
- vector<Blob<Dtype>*>* bottom);
+ const vector<Blob<Dtype>*>& bottom);
/**
* @brief Returns the vector of learnable parameter blobs.
}
/**
- * @brief Returns the layer type as an enum value.
+ * @brief Returns the layer type.
*/
- virtual inline LayerParameter_LayerType type() const {
- return LayerParameter_LayerType_NONE;
- }
-
- /**
- * @brief Returns the layer type name.
- */
- virtual inline const string& type_name() const {
- return LayerParameter_LayerType_Name(type());
- }
+ virtual inline const char* type() const { return ""; }
/**
* @brief Returns the exact number of bottom blobs required by the layer,
}
param_propagate_down_[param_id] = value;
}
+
+#ifdef USE_PROFILING
+
+#ifdef LAYER_PERF_STAT
+ const vector<Blob<Dtype>*> * saved_top;
+ const vector<Blob<Dtype>*> * saved_bottom;
+ perf_stat * get_time_stat(void) { return &time_stat_;}
+ perf_stat * get_pmu_stat(int index) { return &pmu_stat_[index];}
+
+#endif
+
+#endif //USE_PROFILING
protected:
/** The protobuf that stores the layer parameters */
LayerParameter layer_param_;
+ /** The phase: TRAIN or TEST */
+ Phase phase_;
/** The vector that stores the learnable parameters as a set of blobs. */
vector<shared_ptr<Blob<Dtype> > > blobs_;
/** Vector indicating whether to compute the diff of each param blob. */
/** @brief Using the CPU device, compute the layer output. */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) = 0;
+ const vector<Blob<Dtype>*>& top) = 0;
/**
* @brief Using the GPU device, compute the layer output.
* Fall back to Forward_cpu() if unavailable.
*/
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
+ const vector<Blob<Dtype>*>& top) {
// LOG(WARNING) << "Using CPU code as backup.";
return Forward_cpu(bottom, top);
}
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
- vector<Blob<Dtype>*>* bottom) = 0;
+ const vector<Blob<Dtype>*>& bottom) = 0;
/**
* @brief Using the GPU device, compute the gradients for any parameters and
* for the bottom blobs if propagate_down is true.
*/
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
- vector<Blob<Dtype>*>* bottom) {
+ const vector<Blob<Dtype>*>& bottom) {
// LOG(WARNING) << "Using CPU code as backup.";
Backward_cpu(top, propagate_down, bottom);
}
const vector<Blob<Dtype>*>& top) {
if (ExactNumBottomBlobs() >= 0) {
CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
- << type_name() << " Layer takes " << ExactNumBottomBlobs()
+ << type() << " Layer takes " << ExactNumBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MinBottomBlobs() >= 0) {
CHECK_LE(MinBottomBlobs(), bottom.size())
- << type_name() << " Layer takes at least " << MinBottomBlobs()
+ << type() << " Layer takes at least " << MinBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MaxBottomBlobs() >= 0) {
CHECK_GE(MaxBottomBlobs(), bottom.size())
- << type_name() << " Layer takes at most " << MaxBottomBlobs()
+ << type() << " Layer takes at most " << MaxBottomBlobs()
<< " bottom blob(s) as input.";
}
if (ExactNumTopBlobs() >= 0) {
CHECK_EQ(ExactNumTopBlobs(), top.size())
- << type_name() << " Layer produces " << ExactNumTopBlobs()
+ << type() << " Layer produces " << ExactNumTopBlobs()
<< " top blob(s) as output.";
}
if (MinTopBlobs() >= 0) {
CHECK_LE(MinTopBlobs(), top.size())
- << type_name() << " Layer produces at least " << MinTopBlobs()
+ << type() << " Layer produces at least " << MinTopBlobs()
<< " top blob(s) as output.";
}
if (MaxTopBlobs() >= 0) {
CHECK_GE(MaxTopBlobs(), top.size())
- << type_name() << " Layer produces at most " << MaxTopBlobs()
+ << type() << " Layer produces at most " << MaxTopBlobs()
<< " top blob(s) as output.";
}
if (EqualNumBottomTopBlobs()) {
CHECK_EQ(bottom.size(), top.size())
- << type_name() << " Layer produces one top blob as output for each "
+ << type() << " Layer produces one top blob as output for each "
<< "bottom blob input.";
}
}
* Called by SetUp to initialize the weights associated with any top blobs in
* the loss function. Store non-zero loss weights in the diff blob.
*/
- inline void SetLossWeights(vector<Blob<Dtype>*>* top) {
+ inline void SetLossWeights(const vector<Blob<Dtype>*>& top) {
const int num_loss_weights = layer_param_.loss_weight_size();
if (num_loss_weights) {
- CHECK_EQ(top->size(), num_loss_weights) << "loss_weight must be "
+ CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
"unspecified or specified once per top blob.";
- for (int top_id = 0; top_id < top->size(); ++top_id) {
+ for (int top_id = 0; top_id < top.size(); ++top_id) {
const Dtype loss_weight = layer_param_.loss_weight(top_id);
if (loss_weight == Dtype(0)) { continue; }
this->set_loss(top_id, loss_weight);
- const int count = (*top)[top_id]->count();
- Dtype* loss_multiplier = (*top)[top_id]->mutable_cpu_diff();
+ const int count = top[top_id]->count();
+ Dtype* loss_multiplier = top[top_id]->mutable_cpu_diff();
caffe_set(count, loss_weight, loss_multiplier);
}
}
}
+ private:
DISABLE_COPY_AND_ASSIGN(Layer);
+
+#ifdef USE_PROFILING
+#ifdef LAYER_PERF_STAT
+ perf_stat time_stat_;
+ perf_stat pmu_stat_[16];
+#endif
+#endif //USE_PROFILING
}; // class Layer
+
+#ifndef LAYER_PERF_STAT
// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
- vector<Blob<Dtype>*>* top) {
+ const vector<Blob<Dtype>*>& top) {
Dtype loss = 0;
+ Reshape(bottom, top);
switch (Caffe::mode()) {
case Caffe::CPU:
Forward_cpu(bottom, top);
- for (int top_id = 0; top_id < top->size(); ++top_id) {
+ for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
- const int count = (*top)[top_id]->count();
- const Dtype* data = (*top)[top_id]->cpu_data();
- const Dtype* loss_weights = (*top)[top_id]->cpu_diff();
+ const int count = top[top_id]->count();
+ const Dtype* data = top[top_id]->cpu_data();
+ const Dtype* loss_weights = top[top_id]->cpu_diff();
loss += caffe_cpu_dot(count, data, loss_weights);
}
break;
case Caffe::GPU:
Forward_gpu(bottom, top);
#ifndef CPU_ONLY
- for (int top_id = 0; top_id < top->size(); ++top_id) {
+ for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
- const int count = (*top)[top_id]->count();
- const Dtype* data = (*top)[top_id]->gpu_data();
- const Dtype* loss_weights = (*top)[top_id]->gpu_diff();
+ const int count = top[top_id]->count();
+ const Dtype* data = top[top_id]->gpu_data();
+ const Dtype* loss_weights = top[top_id]->gpu_diff();
Dtype blob_loss = 0;
caffe_gpu_dot(count, data, loss_weights, &blob_loss);
loss += blob_loss;
return loss;
}
+#endif
+
template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
- vector<Blob<Dtype>*>* bottom) {
+ const vector<Blob<Dtype>*>& bottom) {
switch (Caffe::mode()) {
case Caffe::CPU:
Backward_cpu(top, propagate_down, bottom);
}
}
-// The layer factory function
-template <typename Dtype>
-Layer<Dtype>* GetLayer(const LayerParameter& param);
-
} // namespace caffe
#endif // CAFFE_LAYER_H_