1. support ACL v17.10
authorhonggui <hongguiyao@msn.com>
Fri, 20 Oct 2017 06:34:09 +0000 (14:34 +0800)
committerhonggui <hongguiyao@msn.com>
Fri, 20 Oct 2017 06:34:09 +0000 (14:34 +0800)
2. the performance of pooling was increased 30X
3. Removing CORASE timer

Makefile.config.acl
src/caffe/layer.cpp
src/caffe/layers/acl_pooling_layer.cpp
unit_tests/Makefile

index b30759f..30669c2 100644 (file)
@@ -15,7 +15,7 @@ ACL_INCS :=$(ACL_ROOT)/include
 ACL_INCS +=$(ACL_ROOT)
 ACL_LIBS_DIR :=$(ACL_ROOT)/build
 ACL_LIBS_DIR +=$(ACL_ROOT)/build/arm_compute
-ACL_LIBS :=arm_compute OpenCL
+ACL_LIBS :=arm_compute arm_compute_core OpenCL
 
 # uncomment to disable IO dependencies and corresponding data layers
 # USE_OPENCV := 0
index 677ed10..882b86a 100644 (file)
@@ -20,7 +20,7 @@ unsigned long get_cur_time(void)
 {
    struct timespec tm;
 
-   clock_gettime(CLOCK_MONOTONIC_COARSE, &tm);
+   clock_gettime(CLOCK_MONOTONIC, &tm);
 
    return (tm.tv_sec*1000000+tm.tv_nsec/1000);
 }
index eb1c5da..f72b223 100644 (file)
@@ -15,8 +15,8 @@ template <typename Dtype>
 void ACLPoolingLayer<Dtype>::SetupACLLayer(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top){
 
-    TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_);
-    TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_);
+    TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_,(unsigned int)this->channels_);
+    TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_,(unsigned int)this->channels_);
     checkreshape(in_shape,Caffe::arm_gpu_mode());
     if (!this->init_layer_) return;
     this->init_layer_=false;
@@ -93,13 +93,11 @@ void ACLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   }
   SetupACLLayer(bottom,top);
   for (int n = 0; n < bottom[0]->num(); ++n) {
-    for (int c = 0; c < this->channels_; ++c) {
         tensor_mem(this->cpu().input,(void*)(bottom_data));
         cpu_run();
         tensor_mem((void*)(top_data),this->cpu().output);
-        bottom_data += bottom[0]->offset(0, 1);
-        top_data += top[0]->offset(0, 1);
-    }
+        bottom_data += bottom[0]->offset(1);
+        top_data += top[0]->offset(1);
   }
 }
 
@@ -130,13 +128,11 @@ void ACLPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   }
   SetupACLLayer(bottom,top);
   for (int n = 0; n < bottom[0]->num(); ++n) {
-    for (int c = 0; c < this->channels_; ++c) {
         tensor_mem(this->gpu().input,(void*)(bottom_data));
         gpu_run();
         tensor_mem((void*)(top_data),this->gpu().output);
-        bottom_data += bottom[0]->offset(0, 1);
-        top_data += top[0]->offset(0, 1);
-    }
+        bottom_data += bottom[0]->offset(1);
+        top_data += top[0]->offset(1);
   }
 }
 
index 034ee02..4e64c0c 100644 (file)
@@ -16,7 +16,7 @@ HOME=/home/firefly
 #  caffe related stuff
 #
 
-CAFFE_ROOT=$(HOME)/caffeOnACL
+CAFFE_ROOT=$(HOME)/CaffeOnACL
 CAFFE_INCS = -I$(CAFFE_ROOT)/include -I$(CAFFE_ROOT)/distribute/include/
 CAFFE_LIBS = -L$(CAFFE_ROOT)/distribute/lib -lcaffe  -lglog -lgflags -lprotobuf -lboost_system -lboost_filesystem
 CAFFE_RPATH =$(CAFFE_ROOT)/distribute/lib