v0.5.0

[platform/upstream/caffeonacl.git] / include / caffe / layer.hpp
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp

index e160075..4ffeb68 100644 (file)
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -7,20 +7,90 @@
  
  #include "caffe/blob.hpp"
  #include "caffe/common.hpp"
+#include "caffe/layer_factory.hpp"
  #include "caffe/proto/caffe.pb.h"
-#include "caffe/util/device_alternate.hpp"
+#include "caffe/util/math_functions.hpp"
+#ifdef USE_PROFILING
+#include <sys/time.h>
+#define        NANO_SEC_CONV 1000000
+extern unsigned int acl_log_flags;
+#endif //USE_PROFILING
+/**
+ Forward declare boost::thread instead of including boost/thread.hpp
+ to avoid a boost/NVCC issues (#1009, #1010) on OSX.
+ */
+namespace boost { class mutex; }
  
  namespace caffe {
+bool AclEnableSchedule(int enable=1);
+#ifdef USE_PROFILING
+class logtime_util
+{
+  public:
+    logtime_util(int mask_, const char* information_){
+      mask = mask_;
+      if(acl_log_flags & mask){
+        strncpy(information, information_, 255);
+        gettimeofday(&tv[0], NULL);
+      }
+    }
+    ~logtime_util(){
+      if(acl_log_flags & mask){
+        long time[2];
+        gettimeofday(&tv[1], NULL);
+        time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec;
+        time[1]   = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec;
+        printf("%s %.6lf\n", information, (((double)time[1] - time[0]) / NANO_SEC_CONV));
+      }
+    }
+    void log_time(bool start)
+    {
+      if(acl_log_flags & mask){
+        if (start){
+          gettimeofday(&tv[0], NULL);
+        }
+        else{
+          long time[2];
+          gettimeofday(&tv[1], NULL);
+          time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec;
+          time[1]   = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec;
+          printf("%s %.6lf\n", information, (((double)time[1] - time[0]) / NANO_SEC_CONV));
+        }
+      }
+    }
+private:
+  struct timeval tv[2];
+  int mask;
+  char information[256];
+};
+
+#ifdef LAYER_PERF_STAT
+
+struct perf_stat {
+
+uint64_t total;
+uint32_t start;
+uint32_t end;
+uint32_t used;
+uint32_t count;
+
+perf_stat(): total(0),start(0),end(0),count(0){};
+
+};
+
+
+#endif
+#endif //USE_PROFILING
  
  /**
   * @brief An interface for the units of computation which can be composed into a
   *        Net.
   *
- * Layer&s must implement a Forward function, in which they take their input
- * (bottom) Blob&s (if any) and compute their output Blob&s (if any).
+ * Layer%s must implement a Forward function, in which they take their input
+ * (bottom) Blob%s (if any) and compute their output Blob%s (if any).
   * They may also implement a Backward function, in which they compute the error
- * gradients with respect to their input Blob&s, given the error gradients with
- * their output Blob&s.
+ * gradients with respect to their input Blob%s, given the error gradients with
+ * their output Blob%s.
   */
  template <typename Dtype>
  class Layer {
@@ -32,7 +102,8 @@ class Layer {
     */
    explicit Layer(const LayerParameter& param)
      : layer_param_(param) {
-      // The only thing we do is to copy blobs if there are any.
+      // Set phase and copy blobs (if there are any).
+      phase_ = param.phase();
        if (layer_param_.blobs_size() > 0) {
          blobs_.resize(layer_param_.blobs_size());
          for (int i = 0; i < layer_param_.blobs_size(); ++i) {
@@ -56,8 +127,9 @@ class Layer {
     * Sets up the loss weight multiplier blobs for any non-zero loss weights.
     * This method may not be overridden.
     */
-  void SetUp(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
-    CheckBlobCounts(bottom, *top);
+  void SetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+    CheckBlobCounts(bottom, top);
      LayerSetUp(bottom, top);
      Reshape(bottom, top);
      SetLossWeights(top);
@@ -80,10 +152,10 @@ class Layer {
     * adjust the top blob sizes.
     */
    virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top) {}
+      const vector<Blob<Dtype>*>& top) {}
  
    /**
-   * @brief Adjust the shapes of top blobs and internal buffers to accomodate
+   * @brief Adjust the shapes of top blobs and internal buffers to accommodate
     *        the shapes of the bottom blobs.
     *
     * @param bottom the input blobs, with the requested input shapes
@@ -92,10 +164,10 @@ class Layer {
     * This method should reshape top blobs as needed according to the shapes
     * of the bottom (input) blobs, as well as reshaping any internal buffers
     * and making any other necessary adjustments so that the layer can
-   * accomodate the bottom blobs.
+   * accommodate the bottom blobs.
     */
    virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top) = 0;
+      const vector<Blob<Dtype>*>& top) = 0;
  
    /**
     * @brief Given the bottom blobs, compute the top blobs and the loss.
@@ -114,8 +186,13 @@ class Layer {
     *
     * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
     */
+#ifdef USE_PROFILING
+   Dtype Forward(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+#else
    inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
+      const vector<Blob<Dtype>*>& top);
+#endif //USE_PROFILING
  
    /**
     * @brief Given the top blob error gradients, compute the bottom blob error
@@ -136,11 +213,11 @@ class Layer {
     * (Backward_cpu or Backward_gpu) to compute the bottom blob diffs given the
     * top blob diffs.
     *
-   * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
+   * Your layer should implement Backward_cpu and (optionally) Backward_gpu.
     */
    inline void Backward(const vector<Blob<Dtype>*>& top,
        const vector<bool>& propagate_down,
-      vector<Blob<Dtype>*>* bottom);
+      const vector<Blob<Dtype>*>& bottom);
  
    /**
     * @brief Returns the vector of learnable parameter blobs.
@@ -177,18 +254,9 @@ class Layer {
    }
  
    /**
-   * @brief Returns the layer type as an enum value.
+   * @brief Returns the layer type.
     */
-  virtual inline LayerParameter_LayerType type() const {
-    return LayerParameter_LayerType_NONE;
-  }
-
-  /**
-   * @brief Returns the layer type name.
-   */
-  virtual inline const string& type_name() const {
-    return LayerParameter_LayerType_Name(type());
-  }
+  virtual inline const char* type() const { return ""; }
  
    /**
     * @brief Returns the exact number of bottom blobs required by the layer,
@@ -290,11 +358,25 @@ class Layer {
      }
      param_propagate_down_[param_id] = value;
    }
+  
+#ifdef USE_PROFILING
+
+#ifdef LAYER_PERF_STAT
  
+   const vector<Blob<Dtype>*> * saved_top;
+   const vector<Blob<Dtype>*> * saved_bottom;
+   perf_stat * get_time_stat(void) { return &time_stat_;}
+   perf_stat * get_pmu_stat(int index) { return &pmu_stat_[index];}
+
+#endif
+
+#endif //USE_PROFILING
  
   protected:
    /** The protobuf that stores the layer parameters */
    LayerParameter layer_param_;
+  /** The phase: TRAIN or TEST */
+  Phase phase_;
    /** The vector that stores the learnable parameters as a set of blobs. */
    vector<shared_ptr<Blob<Dtype> > > blobs_;
    /** Vector indicating whether to compute the diff of each param blob. */
@@ -306,13 +388,13 @@ class Layer {
  
    /** @brief Using the CPU device, compute the layer output. */
    virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top) = 0;
+      const vector<Blob<Dtype>*>& top) = 0;
    /**
     * @brief Using the GPU device, compute the layer output.
     *        Fall back to Forward_cpu() if unavailable.
     */
    virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top) {
+      const vector<Blob<Dtype>*>& top) {
      // LOG(WARNING) << "Using CPU code as backup.";
      return Forward_cpu(bottom, top);
    }
@@ -323,7 +405,7 @@ class Layer {
     */
    virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
        const vector<bool>& propagate_down,
-      vector<Blob<Dtype>*>* bottom) = 0;
+      const vector<Blob<Dtype>*>& bottom) = 0;
    /**
     * @brief Using the GPU device, compute the gradients for any parameters and
     *        for the bottom blobs if propagate_down is true.
@@ -331,7 +413,7 @@ class Layer {
     */
    virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
        const vector<bool>& propagate_down,
-      vector<Blob<Dtype>*>* bottom) {
+      const vector<Blob<Dtype>*>& bottom) {
      // LOG(WARNING) << "Using CPU code as backup.";
      Backward_cpu(top, propagate_down, bottom);
    }
@@ -345,37 +427,37 @@ class Layer {
                                 const vector<Blob<Dtype>*>& top) {
      if (ExactNumBottomBlobs() >= 0) {
        CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
-          << type_name() << " Layer takes " << ExactNumBottomBlobs()
+          << type() << " Layer takes " << ExactNumBottomBlobs()
            << " bottom blob(s) as input.";
      }
      if (MinBottomBlobs() >= 0) {
        CHECK_LE(MinBottomBlobs(), bottom.size())
-          << type_name() << " Layer takes at least " << MinBottomBlobs()
+          << type() << " Layer takes at least " << MinBottomBlobs()
            << " bottom blob(s) as input.";
      }
      if (MaxBottomBlobs() >= 0) {
        CHECK_GE(MaxBottomBlobs(), bottom.size())
-          << type_name() << " Layer takes at most " << MaxBottomBlobs()
+          << type() << " Layer takes at most " << MaxBottomBlobs()
            << " bottom blob(s) as input.";
      }
      if (ExactNumTopBlobs() >= 0) {
        CHECK_EQ(ExactNumTopBlobs(), top.size())
-          << type_name() << " Layer produces " << ExactNumTopBlobs()
+          << type() << " Layer produces " << ExactNumTopBlobs()
            << " top blob(s) as output.";
      }
      if (MinTopBlobs() >= 0) {
        CHECK_LE(MinTopBlobs(), top.size())
-          << type_name() << " Layer produces at least " << MinTopBlobs()
+          << type() << " Layer produces at least " << MinTopBlobs()
            << " top blob(s) as output.";
      }
      if (MaxTopBlobs() >= 0) {
        CHECK_GE(MaxTopBlobs(), top.size())
-          << type_name() << " Layer produces at most " << MaxTopBlobs()
+          << type() << " Layer produces at most " << MaxTopBlobs()
            << " top blob(s) as output.";
      }
      if (EqualNumBottomTopBlobs()) {
        CHECK_EQ(bottom.size(), top.size())
-          << type_name() << " Layer produces one top blob as output for each "
+          << type() << " Layer produces one top blob as output for each "
            << "bottom blob input.";
      }
    }
@@ -384,51 +466,62 @@ class Layer {
     * Called by SetUp to initialize the weights associated with any top blobs in
     * the loss function. Store non-zero loss weights in the diff blob.
     */
-  inline void SetLossWeights(vector<Blob<Dtype>*>* top) {
+  inline void SetLossWeights(const vector<Blob<Dtype>*>& top) {
      const int num_loss_weights = layer_param_.loss_weight_size();
      if (num_loss_weights) {
-      CHECK_EQ(top->size(), num_loss_weights) << "loss_weight must be "
+      CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
            "unspecified or specified once per top blob.";
-      for (int top_id = 0; top_id < top->size(); ++top_id) {
+      for (int top_id = 0; top_id < top.size(); ++top_id) {
          const Dtype loss_weight = layer_param_.loss_weight(top_id);
          if (loss_weight == Dtype(0)) { continue; }
          this->set_loss(top_id, loss_weight);
-        const int count = (*top)[top_id]->count();
-        Dtype* loss_multiplier = (*top)[top_id]->mutable_cpu_diff();
+        const int count = top[top_id]->count();
+        Dtype* loss_multiplier = top[top_id]->mutable_cpu_diff();
          caffe_set(count, loss_weight, loss_multiplier);
        }
      }
    }
  
+ private:
    DISABLE_COPY_AND_ASSIGN(Layer);
+
+#ifdef USE_PROFILING
+#ifdef LAYER_PERF_STAT
+  perf_stat time_stat_;
+  perf_stat pmu_stat_[16];
+#endif
+#endif //USE_PROFILING
  };  // class Layer
  
+
+#ifndef LAYER_PERF_STAT
  // Forward and backward wrappers. You should implement the cpu and
  // gpu specific implementations instead, and should not change these
  // functions.
  template <typename Dtype>
  inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
-    vector<Blob<Dtype>*>* top) {
+    const vector<Blob<Dtype>*>& top) {
    Dtype loss = 0;
+  Reshape(bottom, top);
    switch (Caffe::mode()) {
    case Caffe::CPU:
      Forward_cpu(bottom, top);
-    for (int top_id = 0; top_id < top->size(); ++top_id) {
+    for (int top_id = 0; top_id < top.size(); ++top_id) {
        if (!this->loss(top_id)) { continue; }
-      const int count = (*top)[top_id]->count();
-      const Dtype* data = (*top)[top_id]->cpu_data();
-      const Dtype* loss_weights = (*top)[top_id]->cpu_diff();
+      const int count = top[top_id]->count();
+      const Dtype* data = top[top_id]->cpu_data();
+      const Dtype* loss_weights = top[top_id]->cpu_diff();
        loss += caffe_cpu_dot(count, data, loss_weights);
      }
      break;
    case Caffe::GPU:
      Forward_gpu(bottom, top);
  #ifndef CPU_ONLY
-    for (int top_id = 0; top_id < top->size(); ++top_id) {
+    for (int top_id = 0; top_id < top.size(); ++top_id) {
        if (!this->loss(top_id)) { continue; }
-      const int count = (*top)[top_id]->count();
-      const Dtype* data = (*top)[top_id]->gpu_data();
-      const Dtype* loss_weights = (*top)[top_id]->gpu_diff();
+      const int count = top[top_id]->count();
+      const Dtype* data = top[top_id]->gpu_data();
+      const Dtype* loss_weights = top[top_id]->gpu_diff();
        Dtype blob_loss = 0;
        caffe_gpu_dot(count, data, loss_weights, &blob_loss);
        loss += blob_loss;
@@ -441,10 +534,12 @@ inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
    return loss;
  }
  
+#endif
+
  template <typename Dtype>
  inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
-    vector<Blob<Dtype>*>* bottom) {
+    const vector<Blob<Dtype>*>& bottom) {
    switch (Caffe::mode()) {
    case Caffe::CPU:
      Backward_cpu(top, propagate_down, bottom);
@@ -468,10 +563,6 @@ void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
    }
  }
  
-// The layer factory function
-template <typename Dtype>
-Layer<Dtype>* GetLayer(const LayerParameter& param);
-
  }  // namespace caffe
  
  #endif  // CAFFE_LAYER_H_