From d06352537c5f6f38897fb30119bad9bacb37ce79 Mon Sep 17 00:00:00 2001 From: honggui Date: Fri, 20 Oct 2017 14:34:09 +0800 Subject: [PATCH] 1. support ACL v17.10 2. the performance of pooling was increased 30X 3. Removing CORASE timer --- Makefile.config.acl | 2 +- src/caffe/layer.cpp | 2 +- src/caffe/layers/acl_pooling_layer.cpp | 16 ++++++---------- unit_tests/Makefile | 2 +- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/Makefile.config.acl b/Makefile.config.acl index b30759f..30669c2 100644 --- a/Makefile.config.acl +++ b/Makefile.config.acl @@ -15,7 +15,7 @@ ACL_INCS :=$(ACL_ROOT)/include ACL_INCS +=$(ACL_ROOT) ACL_LIBS_DIR :=$(ACL_ROOT)/build ACL_LIBS_DIR +=$(ACL_ROOT)/build/arm_compute -ACL_LIBS :=arm_compute OpenCL +ACL_LIBS :=arm_compute arm_compute_core OpenCL # uncomment to disable IO dependencies and corresponding data layers # USE_OPENCV := 0 diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp index 677ed10..882b86a 100644 --- a/src/caffe/layer.cpp +++ b/src/caffe/layer.cpp @@ -20,7 +20,7 @@ unsigned long get_cur_time(void) { struct timespec tm; - clock_gettime(CLOCK_MONOTONIC_COARSE, &tm); + clock_gettime(CLOCK_MONOTONIC, &tm); return (tm.tv_sec*1000000+tm.tv_nsec/1000); } diff --git a/src/caffe/layers/acl_pooling_layer.cpp b/src/caffe/layers/acl_pooling_layer.cpp index eb1c5da..f72b223 100644 --- a/src/caffe/layers/acl_pooling_layer.cpp +++ b/src/caffe/layers/acl_pooling_layer.cpp @@ -15,8 +15,8 @@ template void ACLPoolingLayer::SetupACLLayer(const vector*>& bottom, const vector*>& top){ - TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_); - TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_); + TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_,(unsigned int)this->channels_); + TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_,(unsigned int)this->channels_); checkreshape(in_shape,Caffe::arm_gpu_mode()); if (!this->init_layer_) return; this->init_layer_=false; @@ -93,13 +93,11 @@ void ACLPoolingLayer::Forward_cpu(const vector*>& bottom, } SetupACLLayer(bottom,top); for (int n = 0; n < bottom[0]->num(); ++n) { - for (int c = 0; c < this->channels_; ++c) { tensor_mem(this->cpu().input,(void*)(bottom_data)); cpu_run(); tensor_mem((void*)(top_data),this->cpu().output); - bottom_data += bottom[0]->offset(0, 1); - top_data += top[0]->offset(0, 1); - } + bottom_data += bottom[0]->offset(1); + top_data += top[0]->offset(1); } } @@ -130,13 +128,11 @@ void ACLPoolingLayer::Forward_gpu(const vector*>& bottom, } SetupACLLayer(bottom,top); for (int n = 0; n < bottom[0]->num(); ++n) { - for (int c = 0; c < this->channels_; ++c) { tensor_mem(this->gpu().input,(void*)(bottom_data)); gpu_run(); tensor_mem((void*)(top_data),this->gpu().output); - bottom_data += bottom[0]->offset(0, 1); - top_data += top[0]->offset(0, 1); - } + bottom_data += bottom[0]->offset(1); + top_data += top[0]->offset(1); } } diff --git a/unit_tests/Makefile b/unit_tests/Makefile index 034ee02..4e64c0c 100644 --- a/unit_tests/Makefile +++ b/unit_tests/Makefile @@ -16,7 +16,7 @@ HOME=/home/firefly # caffe related stuff # -CAFFE_ROOT=$(HOME)/caffeOnACL +CAFFE_ROOT=$(HOME)/CaffeOnACL CAFFE_INCS = -I$(CAFFE_ROOT)/include -I$(CAFFE_ROOT)/distribute/include/ CAFFE_LIBS = -L$(CAFFE_ROOT)/distribute/lib -lcaffe -lglog -lgflags -lprotobuf -lboost_system -lboost_filesystem CAFFE_RPATH =$(CAFFE_ROOT)/distribute/lib -- 2.7.4