v0.5.0

[platform/upstream/caffeonacl.git] / include / caffe / layer.hpp
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp

index 10f353f..4ffeb68 100644 (file)
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -10,7 +10,11 @@
  #include "caffe/layer_factory.hpp"
  #include "caffe/proto/caffe.pb.h"
  #include "caffe/util/math_functions.hpp"
-
+#ifdef USE_PROFILING
+#include <sys/time.h>
+#define        NANO_SEC_CONV 1000000
+extern unsigned int acl_log_flags;
+#endif //USE_PROFILING
  /**
   Forward declare boost::thread instead of including boost/thread.hpp
   to avoid a boost/NVCC issues (#1009, #1010) on OSX.
@@ -18,6 +22,65 @@
  namespace boost { class mutex; }
  
  namespace caffe {
+bool AclEnableSchedule(int enable=1);
+#ifdef USE_PROFILING
+class logtime_util
+{
+  public:
+    logtime_util(int mask_, const char* information_){
+      mask = mask_;
+      if(acl_log_flags & mask){
+        strncpy(information, information_, 255);
+        gettimeofday(&tv[0], NULL);
+      }
+    }
+    ~logtime_util(){
+      if(acl_log_flags & mask){
+        long time[2];
+        gettimeofday(&tv[1], NULL);
+        time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec;
+        time[1]   = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec;
+        printf("%s %.6lf\n", information, (((double)time[1] - time[0]) / NANO_SEC_CONV));
+      }
+    }
+    void log_time(bool start)
+    {
+      if(acl_log_flags & mask){
+        if (start){
+          gettimeofday(&tv[0], NULL);
+        }
+        else{
+          long time[2];
+          gettimeofday(&tv[1], NULL);
+          time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec;
+          time[1]   = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec;
+          printf("%s %.6lf\n", information, (((double)time[1] - time[0]) / NANO_SEC_CONV));
+        }
+      }
+    }
+private:
+  struct timeval tv[2];
+  int mask;
+  char information[256];
+};
+
+#ifdef LAYER_PERF_STAT
+
+struct perf_stat {
+
+uint64_t total;
+uint32_t start;
+uint32_t end;
+uint32_t used;
+uint32_t count;
+
+perf_stat(): total(0),start(0),end(0),count(0){};
+
+};
+
+
+#endif
+#endif //USE_PROFILING
  
  /**
   * @brief An interface for the units of computation which can be composed into a
@@ -38,7 +101,7 @@ class Layer {
     * layer.
     */
    explicit Layer(const LayerParameter& param)
-    : layer_param_(param), is_shared_(false) {
+    : layer_param_(param) {
        // Set phase and copy blobs (if there are any).
        phase_ = param.phase();
        if (layer_param_.blobs_size() > 0) {
@@ -66,7 +129,6 @@ class Layer {
     */
    void SetUp(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top) {
-    InitMutex();
      CheckBlobCounts(bottom, top);
      LayerSetUp(bottom, top);
      Reshape(bottom, top);
@@ -93,30 +155,6 @@ class Layer {
        const vector<Blob<Dtype>*>& top) {}
  
    /**
-   * @brief Whether a layer should be shared by multiple nets during data
-   *        parallelism. By default, all layers except for data layers should
-   *        not be shared. data layers should be shared to ensure each worker
-   *        solver access data sequentially during data parallelism.
-   */
-  virtual inline bool ShareInParallel() const { return false; }
-
-  /** @brief Return whether this layer is actually shared by other nets.
-   *         If ShareInParallel() is true and using more than one GPU and the
-   *         net has TRAIN phase, then this function is expected return true.
-   */
-  inline bool IsShared() const { return is_shared_; }
-
-  /** @brief Set whether this layer is actually shared by other nets
-   *         If ShareInParallel() is true and using more than one GPU and the
-   *         net has TRAIN phase, then is_shared should be set true.
-   */
-  inline void SetShared(bool is_shared) {
-    CHECK(ShareInParallel() || !is_shared)
-        << type() << "Layer does not support sharing.";
-    is_shared_ = is_shared;
-  }
-
-  /**
     * @brief Adjust the shapes of top blobs and internal buffers to accommodate
     *        the shapes of the bottom blobs.
     *
@@ -148,8 +186,13 @@ class Layer {
     *
     * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
     */
+#ifdef USE_PROFILING
+   Dtype Forward(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+#else
    inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top);
+#endif //USE_PROFILING
  
    /**
     * @brief Given the top blob error gradients, compute the bottom blob error
@@ -315,7 +358,19 @@ class Layer {
      }
      param_propagate_down_[param_id] = value;
    }
+  
+#ifdef USE_PROFILING
+
+#ifdef LAYER_PERF_STAT
  
+   const vector<Blob<Dtype>*> * saved_top;
+   const vector<Blob<Dtype>*> * saved_bottom;
+   perf_stat * get_time_stat(void) { return &time_stat_;}
+   perf_stat * get_pmu_stat(int index) { return &pmu_stat_[index];}
+
+#endif
+
+#endif //USE_PROFILING
  
   protected:
    /** The protobuf that stores the layer parameters */
@@ -428,30 +483,24 @@ class Layer {
    }
  
   private:
-  /** Whether this layer is actually shared by other nets*/
-  bool is_shared_;
-
-  /** The mutex for sequential forward if this layer is shared */
-  shared_ptr<boost::mutex> forward_mutex_;
-
-  /** Initialize forward_mutex_ */
-  void InitMutex();
-  /** Lock forward_mutex_ if this layer is shared */
-  void Lock();
-  /** Unlock forward_mutex_ if this layer is shared */
-  void Unlock();
-
    DISABLE_COPY_AND_ASSIGN(Layer);
+
+#ifdef USE_PROFILING
+#ifdef LAYER_PERF_STAT
+  perf_stat time_stat_;
+  perf_stat pmu_stat_[16];
+#endif
+#endif //USE_PROFILING
  };  // class Layer
  
+
+#ifndef LAYER_PERF_STAT
  // Forward and backward wrappers. You should implement the cpu and
  // gpu specific implementations instead, and should not change these
  // functions.
  template <typename Dtype>
  inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
-  // Lock during forward to ensure sequential forward
-  Lock();
    Dtype loss = 0;
    Reshape(bottom, top);
    switch (Caffe::mode()) {
@@ -482,10 +531,11 @@ inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
    default:
      LOG(FATAL) << "Unknown caffe mode.";
    }
-  Unlock();
    return loss;
  }
  
+#endif
+
  template <typename Dtype>
  inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,