From d06352537c5f6f38897fb30119bad9bacb37ce79 Mon Sep 17 00:00:00 2001
From: honggui <hongguiyao@msn.com>
Date: Fri, 20 Oct 2017 14:34:09 +0800
Subject: [PATCH] 1. support ACL v17.10 2. the performance of pooling was
 increased 30X 3. Removing CORASE timer

---
 Makefile.config.acl                    |  2 +-
 src/caffe/layer.cpp                    |  2 +-
 src/caffe/layers/acl_pooling_layer.cpp | 16 ++++++----------
 unit_tests/Makefile                    |  2 +-
 4 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/Makefile.config.acl b/Makefile.config.acl
index b30759f..30669c2 100644
--- a/Makefile.config.acl
+++ b/Makefile.config.acl
@@ -15,7 +15,7 @@ ACL_INCS :=$(ACL_ROOT)/include
 ACL_INCS +=$(ACL_ROOT)
 ACL_LIBS_DIR :=$(ACL_ROOT)/build
 ACL_LIBS_DIR +=$(ACL_ROOT)/build/arm_compute
-ACL_LIBS :=arm_compute OpenCL
+ACL_LIBS :=arm_compute arm_compute_core OpenCL
 
 # uncomment to disable IO dependencies and corresponding data layers
 # USE_OPENCV := 0
diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp
index 677ed10..882b86a 100644
--- a/src/caffe/layer.cpp
+++ b/src/caffe/layer.cpp
@@ -20,7 +20,7 @@ unsigned long get_cur_time(void)
 {
    struct timespec tm;
 
-   clock_gettime(CLOCK_MONOTONIC_COARSE, &tm);
+   clock_gettime(CLOCK_MONOTONIC, &tm);
 
    return (tm.tv_sec*1000000+tm.tv_nsec/1000);
 }
diff --git a/src/caffe/layers/acl_pooling_layer.cpp b/src/caffe/layers/acl_pooling_layer.cpp
index eb1c5da..f72b223 100644
--- a/src/caffe/layers/acl_pooling_layer.cpp
+++ b/src/caffe/layers/acl_pooling_layer.cpp
@@ -15,8 +15,8 @@ template <typename Dtype>
 void ACLPoolingLayer<Dtype>::SetupACLLayer(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top){
 
-    TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_);
-    TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_);
+    TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_,(unsigned int)this->channels_);
+    TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_,(unsigned int)this->channels_);
     checkreshape(in_shape,Caffe::arm_gpu_mode());
     if (!this->init_layer_) return;
     this->init_layer_=false;
@@ -93,13 +93,11 @@ void ACLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   }
   SetupACLLayer(bottom,top);
   for (int n = 0; n < bottom[0]->num(); ++n) {
-    for (int c = 0; c < this->channels_; ++c) {
         tensor_mem(this->cpu().input,(void*)(bottom_data));
         cpu_run();
         tensor_mem((void*)(top_data),this->cpu().output);
-        bottom_data += bottom[0]->offset(0, 1);
-        top_data += top[0]->offset(0, 1);
-    }
+        bottom_data += bottom[0]->offset(1);
+        top_data += top[0]->offset(1);
   }
 }
 
@@ -130,13 +128,11 @@ void ACLPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   }
   SetupACLLayer(bottom,top);
   for (int n = 0; n < bottom[0]->num(); ++n) {
-    for (int c = 0; c < this->channels_; ++c) {
         tensor_mem(this->gpu().input,(void*)(bottom_data));
         gpu_run();
         tensor_mem((void*)(top_data),this->gpu().output);
-        bottom_data += bottom[0]->offset(0, 1);
-        top_data += top[0]->offset(0, 1);
-    }
+        bottom_data += bottom[0]->offset(1);
+        top_data += top[0]->offset(1);
   }
 }
 
diff --git a/unit_tests/Makefile b/unit_tests/Makefile
index 034ee02..4e64c0c 100644
--- a/unit_tests/Makefile
+++ b/unit_tests/Makefile
@@ -16,7 +16,7 @@ HOME=/home/firefly
 #  caffe related stuff
 #
 
-CAFFE_ROOT=$(HOME)/caffeOnACL
+CAFFE_ROOT=$(HOME)/CaffeOnACL
 CAFFE_INCS = -I$(CAFFE_ROOT)/include -I$(CAFFE_ROOT)/distribute/include/
 CAFFE_LIBS = -L$(CAFFE_ROOT)/distribute/lib -lcaffe  -lglog -lgflags -lprotobuf -lboost_system -lboost_filesystem
 CAFFE_RPATH =$(CAFFE_ROOT)/distribute/lib
-- 
2.7.4