4 #include "caffe/layers/acl_batch_norm_layer.hpp"
8 template <typename Dtype>
9 void ACLBatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
10 const vector<Blob<Dtype>*>& top) {
11 BatchNormLayer<Dtype>::LayerSetUp(bottom, top);
12 this->force_bypass_acl_path_= bypass_acl_class_layer & FLAGS_ENABLE_ACL_BN;
14 template <typename Dtype>
15 void ACLBatchNormLayer<Dtype>::SetupACLOperator(const vector<Blob<Dtype>*>& bottom,
16 const vector<Blob<Dtype>*>& top){
17 arm_compute::TensorShape in_shape ((unsigned int)bottom[0]->width(), (unsigned int)bottom[0]->height(),(unsigned int)bottom[0]->channels(),(unsigned int)bottom[0]->num());
18 if (is_operator_init_done(in_shape)) return;
19 set_operator_init_done();
22 arm_compute::TensorShape out_shape((unsigned int)top[0]->width(), (unsigned int)top[0]->height(),(unsigned int)top[0]->channels(),(unsigned int)top[0]->num());
23 arm_compute::TensorShape mean_shape((unsigned int)this->channels_);
24 arm_compute::TensorShape var_shape=mean_shape;
25 arm_compute::TensorShape beta_shape=mean_shape;
26 arm_compute::TensorShape gamma_shape=mean_shape;
27 Dtype beta_val[beta_shape.total_size()];
28 Dtype gamma_val[gamma_shape.total_size()];
30 for (int i=0;i<beta_shape.total_size();++i) {
33 for (int i=0;i<gamma_shape.total_size();++i) {
37 new_tensor(input(),in_shape,InputdataPtr(this,bottom));
38 new_tensor(output(),out_shape,OutputdataPtr(this,top));
39 // use the stored mean/variance estimates.
40 const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
41 0 : 1 / this->blobs_[2]->cpu_data()[0];
42 caffe_cpu_scale(this->variance_.count(), scale_factor,
43 this->blobs_[0]->cpu_data(), GetDataPtr(this,&this->mean_));
44 caffe_cpu_scale(this->variance_.count(), scale_factor,
45 this->blobs_[1]->cpu_data(), GetDataPtr(this,&this->variance_));
47 new_tensor(mean(),mean_shape,GetDataPtr(this,&this->mean_));
48 new_tensor(var(),var_shape,GetDataPtr(this,&this->variance_));
49 new_tensor(beta(),beta_shape,(void*)beta_val,true);
50 new_tensor(gamma(),gamma_shape,(void*)gamma_val,true);
51 acl_configure(bn,this,this->eps_);
53 template <typename Dtype>
54 void ACLBatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
55 const vector<Blob<Dtype>*>& top) {
56 BatchNormLayer<Dtype>::Reshape(bottom, top);
60 template <typename Dtype>
61 bool ACLBatchNormLayer<Dtype>::Bypass_acl(const vector<Blob<Dtype>*>& bottom,
62 const vector<Blob<Dtype>*>& top){
63 bool bypass_acl=false;
64 if (this->force_bypass_acl_path_||!this->use_global_stats_) {
67 if (isScheduleEnable()) {
73 template <typename Dtype>
74 void ACLBatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
75 const vector<Blob<Dtype>*>& top) {
77 Forward_gpu(bottom, top);
81 logtime_util log_time(ACL_BN_INFO);
82 #endif //USE_PROFILING
83 if (Bypass_acl(bottom,top)) {
84 BatchNormLayer<Dtype>::Forward_cpu(bottom,top);
87 SetupACLOperator(bottom,top);
88 caffe::acl_run(this,bottom,top);
91 template <typename Dtype>
92 void ACLBatchNormLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
93 const vector<Blob<Dtype>*>& top) {
95 logtime_util log_time(ACL_BN_INFO);
96 #endif //USE_PROFILING
97 if (Bypass_acl(bottom,top)) {
98 BatchNormLayer<Dtype>::Forward_cpu(bottom,top);
101 SetupACLOperator(bottom,top);
102 caffe::acl_run(this,bottom,top);
105 template <typename Dtype>
106 ACLBatchNormLayer<Dtype>::~ACLBatchNormLayer() {
109 INSTANTIATE_CLASS(ACLBatchNormLayer);